Whamcloud - gitweb
LU-6209 lnet: Delete all obsolete LND drivers 63/13663/4
authorJames Simmons <uja.ornl@gmail.com>
Tue, 10 Feb 2015 02:28:45 +0000 (21:28 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 8 Mar 2015 11:47:24 +0000 (11:47 +0000)
Remove ralnd, mxlnd, qswlnd drivers. They are no
longer supported and have not even been buildable
for a long time.

Change-Id: I9c88b446028e79122b5847448fdd23fb6cb5c530
Signed-off-by: James Simmons <uja.ornl@gmail.com>
Reviewed-on: http://review.whamcloud.com/13663
Tested-by: Jenkins
Reviewed-by: Isaac Huang <he.huang@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
30 files changed:
libcfs/include/libcfs/libcfs_ioctl.h
lnet/autoconf/lustre-lnet.m4
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lnetctl.h
lnet/klnds/Makefile.in
lnet/klnds/autoMakefile.am
lnet/klnds/mxlnd/Makefile.in [deleted file]
lnet/klnds/mxlnd/README [deleted file]
lnet/klnds/mxlnd/autoMakefile.am [deleted file]
lnet/klnds/mxlnd/mxlnd.c [deleted file]
lnet/klnds/mxlnd/mxlnd.h [deleted file]
lnet/klnds/mxlnd/mxlnd_cb.c [deleted file]
lnet/klnds/mxlnd/mxlnd_modparams.c [deleted file]
lnet/klnds/qswlnd/Makefile.in [deleted file]
lnet/klnds/qswlnd/autoMakefile.am [deleted file]
lnet/klnds/qswlnd/qswlnd.c [deleted file]
lnet/klnds/qswlnd/qswlnd.h [deleted file]
lnet/klnds/qswlnd/qswlnd_cb.c [deleted file]
lnet/klnds/qswlnd/qswlnd_modparams.c [deleted file]
lnet/klnds/ralnd/Makefile.in [deleted file]
lnet/klnds/ralnd/autoMakefile.am [deleted file]
lnet/klnds/ralnd/ralnd.c [deleted file]
lnet/klnds/ralnd/ralnd.h [deleted file]
lnet/klnds/ralnd/ralnd_cb.c [deleted file]
lnet/klnds/ralnd/ralnd_modparams.c [deleted file]
lnet/lnet/config.c
lnet/utils/debug.c
lnet/utils/portals.c
lustre/doc/lctl.8
lustre/utils/lctl.c

index d8206fb..6d8781f 100644 (file)
@@ -150,7 +150,7 @@ struct libcfs_ioctl_handler {
 #define IOC_LIBCFS_DEL_PEER               _IOWR('e', 74, IOCTL_LIBCFS_TYPE)
 #define IOC_LIBCFS_ADD_PEER               _IOWR('e', 75, IOCTL_LIBCFS_TYPE)
 #define IOC_LIBCFS_GET_PEER               _IOWR('e', 76, IOCTL_LIBCFS_TYPE)
-#define IOC_LIBCFS_GET_TXDESC             _IOWR('e', 77, IOCTL_LIBCFS_TYPE)
+/* ioctl 77 is free for use */
 #define IOC_LIBCFS_ADD_INTERFACE          _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
 #define IOC_LIBCFS_DEL_INTERFACE          _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
 #define IOC_LIBCFS_GET_INTERFACE          _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
index 3621261..5d57ff8 100644 (file)
@@ -146,125 +146,6 @@ AC_DEFUN([LN_CONFIG_DLC], [
 ])
 
 #
-# LN_CONFIG_QUADRICS
-#
-# check if quadrics support is in this kernel
-#
-AC_DEFUN([LN_CONFIG_QUADRICS], [
-AC_MSG_CHECKING([for QsNet sources])
-AC_ARG_WITH([qsnet],
-       AC_HELP_STRING([--with-qsnet=path],
-               [set path to qsnet source (default=$LINUX)]),
-       [QSNET=$with_qsnet], [QSNET=$LINUX])
-AC_MSG_RESULT([$QSNET])
-
-QSWLND=""
-QSWCPPFLAGS=""
-AC_MSG_CHECKING([if quadrics kernel headers are present])
-AS_IF([test -d $QSNET/drivers/net/qsnet], [
-       AC_MSG_RESULT([yes])
-       QSWLND="qswlnd"
-       AC_MSG_CHECKING([for multirail EKC])
-       AS_IF([test -f $QSNET/include/elan/epcomms.h], [
-               AC_MSG_RESULT([supported])
-               QSNET=$(readlink --canonicalize $QSNET)
-               QSWCPPFLAGS="-I$QSNET/include -DMULTIRAIL_EKC=1"
-       ], [
-               AC_MSG_RESULT([not supported])
-               AC_MSG_ERROR([Need multirail EKC])
-       ])
-
-       AS_IF([test x$QSNET = x$LINUX], [
-               LB_CHECK_CONFIG([QSNET], [], [
-                       LB_CHECK_CONFIG([QSNET_MODULE], [], [
-                               AC_MSG_WARN([QSNET is not enabled in this kernel; not building qswlnd.])
-                               QSWLND=""
-                               QSWCPPFLAGS=""
-                       ])
-               ])
-       ])
-], [
-       AC_MSG_RESULT([no])
-])
-AC_SUBST(QSWLND)
-AC_SUBST(QSWCPPFLAGS)
-]) # LN_CONFIG_QUADRICS
-
-#
-# LN_CONFIG_MX
-#
-AC_DEFUN([LN_CONFIG_MX], [
-# set default
-MXPATH="/opt/mx"
-AC_MSG_CHECKING([whether to enable Myrinet MX support])
-AC_ARG_WITH([mx],
-       AC_HELP_STRING([--with-mx=path],
-               [build mxlnd against path]),
-       [
-               case $with_mx in
-               yes) ENABLEMX=2 ;;
-               no)  ENABLEMX=0 ;;
-               *)   ENABLEMX=3; MXPATH=$with_mx ;;
-               esac
-       ],[
-               ENABLEMX=1
-       ])
-AS_IF([test $ENABLEMX -eq 0], [
-       AC_MSG_RESULT([disabled])
-], [test ! \( -f ${MXPATH}/include/myriexpress.h -a \
-             -f ${MXPATH}/include/mx_kernel_api.h -a \
-             -f ${MXPATH}/include/mx_pin.h \)], [
-       AC_MSG_RESULT([no])
-       case $ENABLEMX in
-       1) ;;
-       2) AC_MSG_ERROR([Myrinet MX kernel headers not present]) ;;
-       3) AC_MSG_ERROR([bad --with-mx path]) ;;
-       *) AC_MSG_ERROR([internal error]) ;;
-       esac
-], [
-       AC_MSG_RESULT([check])
-       MXPATH=$(readlink --canonicalize $MXPATH)
-       MXCPPFLAGS="-I$MXPATH/include"
-       MXLIBS="-L$MXPATH/lib"
-       EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
-       EXTRA_KCFLAGS="$EXTRA_KCFLAGS $MXCPPFLAGS"
-       LB_CHECK_COMPILE([if have Myrinet MX support],
-       myrinet_mx_support, [
-               #define MX_KERNEL 1
-               #include <mx_extensions.h>
-               #include <myriexpress.h>
-       ],[
-               mx_endpoint_t   end;
-               mx_status_t     status;
-               mx_request_t    request;
-               int             result;
-               mx_init();
-               mx_open_endpoint(MX_ANY_NIC, MX_ANY_ENDPOINT, 0, NULL, 0, &end);
-               mx_register_unexp_handler(end, (mx_unexp_handler_t) NULL, NULL);
-               mx_wait_any(end, MX_INFINITE, 0LL, 0LL, &status, &result);
-               mx_iconnect(end, 0LL, 0, 0, 0, NULL, &request);
-               return 0;
-       ],[
-               MXLND="mxlnd"
-       ],[
-               case $ENABLEMX in
-               1) ;;
-               2) AC_MSG_ERROR([can't compile with Myrinet MX kernel headers]) ;;
-               3) AC_MSG_ERROR([can't compile with Myrinet MX headers under $MXPATH]) ;;
-               *) AC_MSG_ERROR([internal error]) ;;
-               esac
-               MXCPPFLAGS=""
-               MXLIBS=""
-               MXLND=""
-       ])
-       EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-])
-AC_SUBST(MXCPPFLAGS)
-AC_SUBST(MXLIBS)
-AC_SUBST(MXLND)
-]) # LN_CONFIG_MX
-
-#
 # LN_CONFIG_O2IB
 #
 AC_DEFUN([LN_CONFIG_O2IB], [
@@ -437,35 +318,6 @@ AS_IF([test $ENABLEO2IB -ne 0], [
 ]) # LN_CONFIG_O2IB
 
 #
-# LN_CONFIG_RALND
-#
-# check whether to use the RapidArray lnd
-#
-AC_DEFUN([LN_CONFIG_RALND], [
-RALND=""
-RACPPFLAGS="-I${LINUX}/drivers/xd1/include"
-EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
-EXTRA_KCFLAGS="$EXTRA_KCFLAGS $RACPPFLAGS"
-LB_CHECK_COMPILE([if 'RapidArray' kernel headers are present],
-RapkGetDeviceByIndex, [
-       #include <linux/types.h>
-       #include <rapl.h>
-],[
-       RAP_RETURN rc;
-       RAP_PVOID  dev_handle;
-       rc = RapkGetDeviceByIndex(0, NULL, &dev_handle);
-       return rc == RAP_SUCCESS ? 0 : 1;
-],[
-       RALND="ralnd"
-],[
-       RACPPFLAGS=""
-])
-EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-AC_SUBST(RACPPFLAGS)
-AC_SUBST(RALND)
-]) # LN_CONFIG_RALND
-
-#
 # LN_CONFIG_GNILND
 #
 # check whether to use the Gemini Network Interface lnd
@@ -581,11 +433,8 @@ AC_MSG_NOTICE([LNet kernel checks
 LN_FUNC_DEV_GET_BY_NAME_2ARG
 LN_CONFIG_AFFINITY
 LN_CONFIG_BACKOFF
-LN_CONFIG_QUADRICS
 LN_CONFIG_O2IB
-LN_CONFIG_RALND
 LN_CONFIG_GNILND
-LN_CONFIG_MX
 # 2.6.36
 LN_CONFIG_TCP_SENDPAGE
 # 3.15
@@ -673,10 +522,7 @@ LN_CONFIG_DLC
 # AM_CONDITOINAL defines for lnet
 #
 AC_DEFUN([LN_CONDITIONALS], [
-AM_CONDITIONAL(BUILD_QSWLND,     test x$QSWLND = "xqswlnd")
-AM_CONDITIONAL(BUILD_MXLND,      test x$MXLND = "xmxlnd")
 AM_CONDITIONAL(BUILD_O2IBLND,    test x$O2IBLND = "xo2iblnd")
-AM_CONDITIONAL(BUILD_RALND,      test x$RALND = "xralnd")
 AM_CONDITIONAL(BUILD_GNILND,     test x$GNILND = "xgnilnd")
 AM_CONDITIONAL(BUILD_GNILND_RCA, test x$GNILNDRCA = "xgnilndrca")
 AM_CONDITIONAL(BUILD_DLC,        test x$USE_DLC = "xyes")
@@ -697,14 +543,8 @@ lnet/include/Makefile
 lnet/include/lnet/Makefile
 lnet/klnds/Makefile
 lnet/klnds/autoMakefile
-lnet/klnds/mxlnd/autoMakefile
-lnet/klnds/mxlnd/Makefile
 lnet/klnds/o2iblnd/Makefile
 lnet/klnds/o2iblnd/autoMakefile
-lnet/klnds/qswlnd/Makefile
-lnet/klnds/qswlnd/autoMakefile
-lnet/klnds/ralnd/Makefile
-lnet/klnds/ralnd/autoMakefile
 lnet/klnds/gnilnd/Makefile
 lnet/klnds/gnilnd/autoMakefile
 lnet/klnds/socklnd/Makefile
index a78da4f..3daa92c 100644 (file)
@@ -692,7 +692,6 @@ void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd);
 
 void lnet_register_lnd(lnd_t *lnd);
 void lnet_unregister_lnd(lnd_t *lnd);
-int lnet_set_ip_niaddr (lnet_ni_t *ni);
 
 int lnet_connect(cfs_socket_t **sockp, lnet_nid_t peer_nid,
                  __u32 local_ip, __u32 peer_ip, int peer_port);
index 4daff78..be19661 100644 (file)
@@ -152,7 +152,6 @@ int jt_ptl_del_peer (int argc, char **argv);
 int jt_ptl_print_connections (int argc, char **argv);
 int jt_ptl_disconnect(int argc, char **argv);
 int jt_ptl_push_connection(int argc, char **argv);
-int jt_ptl_print_active_txs(int argc, char **argv);
 int jt_ptl_ping(int argc, char **argv);
 int jt_ptl_mynid(int argc, char **argv);
 int jt_ptl_add_uuid(int argc, char **argv);
index ad17897..6fc3744 100644 (file)
@@ -1,8 +1,5 @@
-@BUILD_MXLND_TRUE@subdir-m += mxlnd
-@BUILD_RALND_TRUE@subdir-m += ralnd
 @BUILD_GNILND_TRUE@subdir-m += gnilnd
 @BUILD_O2IBLND_TRUE@subdir-m += o2iblnd
-@BUILD_QSWLND_TRUE@subdir-m += qswlnd
 subdir-m += socklnd
 
 @INCLUDE_RULES@
index 78eb985..ce24433 100644 (file)
@@ -34,4 +34,4 @@
 # Lustre is a trademark of Sun Microsystems, Inc.
 #
 
-SUBDIRS = socklnd qswlnd mxlnd ralnd gnilnd o2iblnd
+SUBDIRS = socklnd gnilnd o2iblnd
diff --git a/lnet/klnds/mxlnd/Makefile.in b/lnet/klnds/mxlnd/Makefile.in
deleted file mode 100644 (file)
index 378dbdd..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES := kmxlnd
-kmxlnd-objs := mxlnd.o mxlnd_cb.o mxlnd_modparams.o
-
-EXTRA_POST_CFLAGS := @MXCPPFLAGS@
-
-@INCLUDE_RULES@
diff --git a/lnet/klnds/mxlnd/README b/lnet/klnds/mxlnd/README
deleted file mode 100644 (file)
index 7467b42..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-*************************************************************************
-*                                                                       *
-*    Myrinet Express Lustre Networking Driver (MXLND) documentation     *
-*                                                                       *
-*************************************************************************
-
-README of MXLND
-
-MXLND provides support for Myricom's Myrinet Express (MX) communication
-layer in Lustre.
-
-MXLND may be used with either MX-10G or MX-2G. See MX's README for
-supported NICs.
-
-Table of Contents:
-    I. Installation
-       1. Configuring and compiling
-       2. Module Parameters
-   II. MXLND Performance
-  III. Caveats
-       1. Systems with different page sizes
-       2. Multi-homing
-       3. MX endpoint collision
-   IV. License
-    V. Support
-
-================
-I. Installation
-================
-
-MXLND is supported on Linux 2.6. It may be possible to run it on 2.4,
-but it has not been tested. MXLND requires Myricom's MX version 1.2.8
-or higher. See MX's README for the supported list of processors.
-
-MXLND requires the optional MX kernel library interface. MX must be compiled
-with --enable-kernel-lib.
-
-1. Configuring and compiling
-
-MXLND should be already integrated into the Lustre build process. To 
-build MXLND, you will need to set the path to your MX installation
-in Lustre's ./configure:
-
-    --with-mx=/opt/mx
-
-replacing /opt with the actual path. Configure will check to ensure that
-the MX version has the required functions. If not, it will fail to build.
-To check if MXLND built, look for:
-
-    checking whether to enable Myrinet MX support... yes
-
-in configure's output or the presence of Makefile in
-$LUSTRE/lnet/klnds/mxlnd.
-
-2. Module Parameters
-
-MXLND supports a number of load-time parameters using Linux's module
-parameter system. On our test systems, we created the following file:
-
-    /etc/modprobe.d/kmxlnd
-
-On some (older?) systems, you may need to modify /etc/modprobe.conf.
-
-The available options are:
-
-    n_waitd    # of completion daemons
-    cksum      set non-zero to enable small message (< 4KB) checksums
-    ntx                # of total tx message descriptors
-    peercredits        # concurrent sends to one peer
-    board      index value of the Myrinet board
-    ep_id      MX endpoint ID
-    ipif_name  IPoMX interface name
-    polling    Use 0 to block (wait). A value > 0 will poll that many times before blocking
-
-    credits    Unused - was # concurrent sends to all peers
-    max_peers  Unused - was maximum number of peers that may connect
-    hosts      Unused - was IP-to-hostname resolution file
-
-You may want to vary the options to obtain the optimal performance for your
-platform.
-
-    n_waitd sets the number of threads that process completed MX requests
-(sends and receives). In our testing, the default of 1 performed best.
-
-    cksum turns on small message checksums. It can be used to aid in trouble-
-shooting. MX also provides an optional checksumming feature which can check 
-all messages (large and small). See the MX README for details.
-
-    ntx is the number of total sends in flight from this machine.
-
-    peercredits is the number of in-flight messages for a specific peer. This is part
-of the flow-control system in Lustre. Increasing this value may improve performance
-but it requires more memory since each message requires at least one page.
-
-    board is the index of the Myricom NIC. Hosts can have multiple Myricom NICs
-and this identifies which one MXLND should use.
-
-    ep_id is the MX endpoint ID. Each process that uses MX is required to have at
-least one MX endpoint to access the MX library and NIC. The ID is a simple index
-starting at 0. When used on a server, the server will attempt to use this end-
-point. When used on a client, it specifies the endpoint to connect to on the 
-management server.
-
-    ipif_name is the name of the Ethernet interface over MX. Generally, it is
-myriN, where N matches the MX board index.
-
-    polling determines whether this host will poll or block for MX request com-
-pletions. A value of 0 blocks and any positive value will poll that many times
-before blocking. Since polling increases CPU usage, we suggest you set this to
-0 on the client and experiment with different values for servers.
-
-=====================
-II. MXLND Performance
-=====================
-
-On MX-2G systems, MXLND should easily saturate the link and use minimal CPU 
-(5-10% for read and write operations). On MX-10G systems, MXLND can saturate 
-the link and use moderate CPU resources (20-30% for read and write operations).
-MX-10G relies on PCI-Express which is relatively new and performance varies
-considerably by processor, motherboard and PCI-E chipset. Refer to Myricom's
-website for the latest DMA read/write performance results by motherboard. The
-DMA results will place an upper-bound on MXLND performance.
-
-============
-III. Caveats
-============
-
-1. Systems with different page sizes
-
-MXLND will set the maximum small message size equal to the kernel's page size.
-This means that machines running MXLND that have different page sizes are not
-able to communicate with each other. If you wish to run MXLND in this case,
-send email to help@myri.com.
-
-2. Multi-homing
-
-At this time, the MXLND does not support more than one interface at a time.
-Thus, a single Lustre router cannot route between two MX-10G, between two
-MX-2G, or between MX-10G and MX-2G fabrics.
-
-3. MX endpoint collision
-
-Each process that uses MX is required to have at least one MX endpoint to
-access the MX library and NIC. Other processes may need to use MX and no two
-processes can use the same endpoint ID.  MPICH-MX dynamically chooses one at
-MPI startup and should not interfere with MXLND. Sockets-MX, on the other hand,
-is hard coded to use 0 for its ID. If it is possible that anyone will want to
-run Sockets-MX on this system, use a non-0 value for MXLND's endpoint ID.
-
-
-===========
-IV. License
-===========
-
-MXLND is copyright (C) 2006 of Myricom, Inc. 
-
-MXLND is part of Lustre, http://www.lustre.org.
-
-MXLND is free software; you can redistribute it and/or modify it under the
-terms of version 2 of the GNU General Public License as published by the Free
-Software Foundation.
-
-MXLND is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-PARTICULAR PURPOSE.  See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA.
-
-==========
-V. Support
-==========
-
-If you have questions about MXLND, please contact help@myri.com.
diff --git a/lnet/klnds/mxlnd/autoMakefile.am b/lnet/klnds/mxlnd/autoMakefile.am
deleted file mode 100644 (file)
index e5efec3..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# GPL HEADER START
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 only,
-# as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License version 2 for more details (a copy is included
-# in the LICENSE file that accompanied this code).
-#
-# You should have received a copy of the GNU General Public License
-# version 2 along with this program; If not, see
-# http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-#
-# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-# CA 95054 USA or visit www.sun.com if you need additional information or
-# have any questions.
-#
-# GPL HEADER END
-#
-
-#
-# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# This file is part of Lustre, http://www.lustre.org/
-# Lustre is a trademark of Sun Microsystems, Inc.
-#
-
-if MODULES
-if BUILD_MXLND
-modulenet_DATA = kmxlnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-EXTRA_DIST = $(kmxlnd-objs:%.o=%.c) mxlnd.h
diff --git a/lnet/klnds/mxlnd/mxlnd.c b/lnet/klnds/mxlnd/mxlnd.c
deleted file mode 100644 (file)
index 0b494c9..0000000
+++ /dev/null
@@ -1,715 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2014, Intel Corporation.
- *
- * Copyright (C) 2006 Myricom, Inc.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/mxlnd/mxlnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Scott Atchley <atchley at myri.com>
- */
-
-#include "mxlnd.h"
-
-lnd_t the_kmxlnd = {
-        .lnd_type       = MXLND,
-        .lnd_startup    = mxlnd_startup,
-        .lnd_shutdown   = mxlnd_shutdown,
-        .lnd_ctl        = mxlnd_ctl,
-        .lnd_send       = mxlnd_send,
-        .lnd_recv       = mxlnd_recv,
-};
-
-kmx_data_t               kmxlnd_data;
-
-void
-mxlnd_free_pages(kmx_pages_t *p)
-{
-        int     npages = p->mxg_npages;
-        int     i;
-
-        CDEBUG(D_MALLOC, "freeing %d pages\n", npages);
-
-        for (i = 0; i < npages; i++) {
-                if (p->mxg_pages[i] != NULL) {
-                        __free_page(p->mxg_pages[i]);
-                       spin_lock(&kmxlnd_data.kmx_mem_lock);
-                       kmxlnd_data.kmx_mem_used -= PAGE_SIZE;
-                       spin_unlock(&kmxlnd_data.kmx_mem_lock);
-                }
-        }
-
-        MXLND_FREE(p, offsetof(kmx_pages_t, mxg_pages[npages]));
-}
-
-int
-mxlnd_alloc_pages(kmx_pages_t **pp, int npages)
-{
-        kmx_pages_t    *p       = NULL;
-        int             i       = 0;
-
-        CDEBUG(D_MALLOC, "allocing %d pages\n", npages);
-
-        MXLND_ALLOC(p, offsetof(kmx_pages_t, mxg_pages[npages]));
-        if (p == NULL) {
-                CERROR("Can't allocate descriptor for %d pages\n", npages);
-                return -ENOMEM;
-        }
-
-        memset(p, 0, offsetof(kmx_pages_t, mxg_pages[npages]));
-        p->mxg_npages = npages;
-
-        for (i = 0; i < npages; i++) {
-                p->mxg_pages[i] = alloc_page(GFP_KERNEL);
-                if (p->mxg_pages[i] == NULL) {
-                        CERROR("Can't allocate page %d of %d\n", i, npages);
-                        mxlnd_free_pages(p);
-                        return -ENOMEM;
-                }
-               spin_lock(&kmxlnd_data.kmx_mem_lock);
-               kmxlnd_data.kmx_mem_used += PAGE_SIZE;
-               spin_unlock(&kmxlnd_data.kmx_mem_lock);
-        }
-
-        *pp = p;
-        return 0;
-}
-
-/**
- * mxlnd_ctx_init - reset ctx struct to the default values
- * @ctx - a kmx_ctx pointer
- */
-void
-mxlnd_ctx_init(kmx_ctx_t *ctx)
-{
-        if (ctx == NULL) return;
-
-        /* do not change mxc_type */
-        ctx->mxc_incarnation = 0;
-        ctx->mxc_deadline = 0;
-        ctx->mxc_state = MXLND_CTX_IDLE;
-        if (!cfs_list_empty(&ctx->mxc_list))
-                cfs_list_del_init(&ctx->mxc_list);
-        /* ignore mxc_rx_list */
-        if (ctx->mxc_type == MXLND_REQ_TX) {
-                ctx->mxc_nid = 0;
-                ctx->mxc_peer = NULL;
-                ctx->mxc_conn = NULL;
-        }
-        /* ignore mxc_msg */
-        ctx->mxc_lntmsg[0] = NULL;
-        ctx->mxc_lntmsg[1] = NULL;
-        ctx->mxc_msg_type = 0;
-        ctx->mxc_cookie = 0LL;
-        ctx->mxc_match = 0LL;
-        /* ctx->mxc_seg.segment_ptr points to backing page */
-        ctx->mxc_seg.segment_length = 0;
-        if (ctx->mxc_seg_list != NULL) {
-                LASSERT(ctx->mxc_nseg > 0);
-                MXLND_FREE(ctx->mxc_seg_list, ctx->mxc_nseg * sizeof(mx_ksegment_t));
-        }
-        ctx->mxc_seg_list = NULL;
-        ctx->mxc_nseg = 0;
-        ctx->mxc_nob = 0;
-        memset(&ctx->mxc_mxreq, 0, sizeof(mx_request_t));
-        memset(&ctx->mxc_status, 0, sizeof(mx_status_t));
-        ctx->mxc_errno = 0;
-        /* ctx->mxc_get */
-        /* ctx->mxc_put */
-
-        ctx->mxc_msg->mxm_type = 0;
-        ctx->mxc_msg->mxm_credits = 0;
-        ctx->mxc_msg->mxm_nob = 0;
-
-        return;
-}
-
-/**
- * mxlnd_free_txs - free kmx_txs and associated pages
- *
- * Called from mxlnd_shutdown()
- */
-void
-mxlnd_free_txs(void)
-{
-        int             i       = 0;
-        kmx_ctx_t       *tx     = NULL;
-
-        if (kmxlnd_data.kmx_tx_pages) {
-                for (i = 0; i < MXLND_TX_MSGS(); i++) {
-                        tx = &kmxlnd_data.kmx_txs[i];
-                        if (tx->mxc_seg_list != NULL) {
-                                LASSERT(tx->mxc_nseg > 0);
-                                MXLND_FREE(tx->mxc_seg_list,
-                                           tx->mxc_nseg *
-                                           sizeof(*tx->mxc_seg_list));
-                        }
-                }
-                MXLND_FREE(kmxlnd_data.kmx_txs,
-                            MXLND_TX_MSGS() * sizeof(kmx_ctx_t));
-                mxlnd_free_pages(kmxlnd_data.kmx_tx_pages);
-        }
-
-        return;
-}
-
-/**
- * mxlnd_init_txs - allocate tx descriptors then stash on txs and idle tx lists
- *
- * Called from mxlnd_startup()
- * returns 0 on success, else -ENOMEM
- */
-int
-mxlnd_init_txs(void)
-{
-        int             ret     = 0;
-        int             i       = 0;
-        int             ipage   = 0;
-        int             offset  = 0;
-        void           *addr    = NULL;
-        kmx_ctx_t      *tx      = NULL;
-        kmx_pages_t    *pages   = NULL;
-        struct page    *page    = NULL;
-
-        /* pre-mapped messages are not bigger than 1 page */
-        CLASSERT(MXLND_MSG_SIZE <= PAGE_SIZE);
-
-        /* No fancy arithmetic when we do the buffer calculations */
-        CLASSERT (PAGE_SIZE % MXLND_MSG_SIZE == 0);
-
-        ret = mxlnd_alloc_pages(&pages, MXLND_TX_MSG_PAGES());
-        if (ret != 0) {
-                CERROR("Can't allocate tx pages\n");
-                return -ENOMEM;
-        }
-        kmxlnd_data.kmx_tx_pages = pages;
-
-        MXLND_ALLOC(kmxlnd_data.kmx_txs, MXLND_TX_MSGS() * sizeof(kmx_ctx_t));
-        if (&kmxlnd_data.kmx_txs == NULL) {
-                CERROR("Can't allocate %d tx descriptors\n", MXLND_TX_MSGS());
-                mxlnd_free_pages(pages);
-                return -ENOMEM;
-        }
-
-        memset(kmxlnd_data.kmx_txs, 0, MXLND_TX_MSGS() * sizeof(kmx_ctx_t));
-
-        for (i = 0; i < MXLND_TX_MSGS(); i++) {
-
-                tx = &kmxlnd_data.kmx_txs[i];
-                tx->mxc_type = MXLND_REQ_TX;
-
-                CFS_INIT_LIST_HEAD(&tx->mxc_list);
-
-                /* map mxc_msg to page */
-                page = pages->mxg_pages[ipage];
-                addr = page_address(page);
-                LASSERT(addr != NULL);
-                tx->mxc_msg = (kmx_msg_t *)(addr + offset);
-                tx->mxc_seg.segment_ptr = MX_PA_TO_U64(virt_to_phys(tx->mxc_msg));
-
-                mxlnd_ctx_init(tx);
-
-                offset += MXLND_MSG_SIZE;
-                LASSERT (offset <= PAGE_SIZE);
-
-                if (offset == PAGE_SIZE) {
-                        offset = 0;
-                        ipage++;
-                        LASSERT (ipage <= MXLND_TX_MSG_PAGES());
-                }
-
-                /* in startup(), no locks required */
-                cfs_list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle);
-        }
-
-        return 0;
-}
-
-/**
- * mxlnd_free_peers - free peers
- *
- * Called from mxlnd_shutdown()
- */
-void
-mxlnd_free_peers(void)
-{
-        int             i      = 0;
-        int             count  = 0;
-        kmx_peer_t     *peer   = NULL;
-        kmx_peer_t     *next   = NULL;
-
-        for (i = 0; i < MXLND_HASH_SIZE; i++) {
-                cfs_list_for_each_entry_safe(peer, next,
-                                             &kmxlnd_data.kmx_peers[i],
-                                             mxp_list) {
-                        cfs_list_del_init(&peer->mxp_list);
-                        if (peer->mxp_conn) mxlnd_conn_decref(peer->mxp_conn);
-                        mxlnd_peer_decref(peer);
-                        count++;
-                }
-        }
-        CDEBUG(D_NET, "%s: freed %d peers\n", __func__, count);
-}
-
-/**
- * mxlnd_init_mx - open the endpoint, set our ID, register the EAGER callback
- * @ni - the network interface
- *
- * Returns 0 on success, -1 on failure
- */
-int
-mxlnd_init_mx(lnet_ni_t *ni)
-{
-        int                     ret     = 0;
-        mx_return_t             mxret;
-        u32                     board   = *kmxlnd_tunables.kmx_board;
-        u32                     ep_id   = *kmxlnd_tunables.kmx_ep_id;
-        u64                     nic_id  = 0LL;
-        char                    *ifname = NULL;
-        __u32                   ip;
-        __u32                   netmask;
-        int                     if_up   = 0;
-
-        mxret = mx_init();
-        if (mxret != MX_SUCCESS) {
-                CERROR("mx_init() failed with %s (%d)\n", mx_strerror(mxret), mxret);
-                return -1;
-        }
-
-        if (ni->ni_interfaces[0] != NULL) {
-                /* Use the IPoMX interface specified in 'networks=' */
-
-                CLASSERT (LNET_MAX_INTERFACES > 1);
-                if (ni->ni_interfaces[1] != NULL) {
-                        CERROR("Multiple interfaces not supported\n");
-                        goto failed_with_init;
-                }
-
-                ifname = ni->ni_interfaces[0];
-        } else {
-                ifname = *kmxlnd_tunables.kmx_default_ipif;
-        }
-
-        ret = libcfs_ipif_query(ifname, &if_up, &ip, &netmask);
-        if (ret != 0) {
-                CERROR("Can't query IPoMX interface %s: %d\n",
-                       ifname, ret);
-                goto failed_with_init;
-        }
-
-        if (!if_up) {
-                CERROR("Can't query IPoMX interface %s: it's down\n",
-                       ifname);
-                goto failed_with_init;
-        }
-
-        mxret = mx_open_endpoint(board, ep_id, MXLND_MSG_MAGIC,
-                                 NULL, 0, &kmxlnd_data.kmx_endpt);
-        if (mxret != MX_SUCCESS) {
-                CERROR("mx_open_endpoint() failed with %d\n", mxret);
-                goto failed_with_init;
-        }
-
-       mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &kmxlnd_data.kmx_epa);
-       mx_decompose_endpoint_addr(kmxlnd_data.kmx_epa, &nic_id, &ep_id);
-       mxret = mx_connect(kmxlnd_data.kmx_endpt, nic_id, ep_id,
-                          MXLND_MSG_MAGIC,
-                          jiffies_to_msecs(MXLND_CONNECT_TIMEOUT),
-                          &kmxlnd_data.kmx_epa);
-       if (mxret != MX_SUCCESS) {
-               CNETERR("unable to connect to myself (%s)\n", mx_strerror(mxret));
-               goto failed_with_endpoint;
-       }
-
-        ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
-        CDEBUG(D_NET, "My NID is 0x%llx\n", ni->ni_nid);
-
-        /* this will catch all unexpected receives. */
-        mxret = mx_register_unexp_handler(kmxlnd_data.kmx_endpt,
-                                          (mx_unexp_handler_t) mxlnd_unexpected_recv,
-                                          NULL);
-        if (mxret != MX_SUCCESS) {
-                CERROR("mx_register_unexp_callback() failed with %s\n",
-                         mx_strerror(mxret));
-                goto failed_with_endpoint;
-        }
-       mxret = mx_set_request_timeout(kmxlnd_data.kmx_endpt, NULL,
-                                      jiffies_to_msecs(MXLND_COMM_TIMEOUT));
-       if (mxret != MX_SUCCESS) {
-               CERROR("mx_set_request_timeout() failed with %s\n",
-                       mx_strerror(mxret));
-               goto failed_with_endpoint;
-       }
-        return 0;
-
-failed_with_endpoint:
-        mx_close_endpoint(kmxlnd_data.kmx_endpt);
-failed_with_init:
-        mx_finalize();
-        return -1;
-}
-
-
-/**
- * mxlnd_thread_start - spawn a kernel thread with this function
- * @fn - function pointer
- * @arg - pointer to the parameter data
- * @name - name of new thread
- *
- * Returns 0 on success and a negative value on failure
- */
-int
-mxlnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
-       cfs_task *task;
-       int     i   = (int) ((long) arg);
-
-       atomic_inc(&kmxlnd_data.kmx_nthreads);
-       init_completion(&kmxlnd_data.kmx_completions[i]);
-
-       task = kthread_run(fn, arg, name);
-       if (IS_ERR(task)) {
-               CERROR("cfs_create_thread() failed with %d\n", PTR_ERR(task));
-               atomic_dec(&kmxlnd_data.kmx_nthreads);
-       }
-       return PTR_ERR(task);
-}
-
-/**
- * mxlnd_thread_stop - decrement thread counter
- *
- * The thread returns 0 when it detects shutdown.
- * We are simply decrementing the thread counter.
- */
-void
-mxlnd_thread_stop(long id)
-{
-       int     i       = (int) id;
-       atomic_dec (&kmxlnd_data.kmx_nthreads);
-       complete(&kmxlnd_data.kmx_completions[i]);
-}
-
-/**
- * mxlnd_shutdown - stop IO, clean up state
- * @ni - LNET interface handle
- *
- * No calls to the LND should be made after calling this function.
- */
-void
-mxlnd_shutdown (lnet_ni_t *ni)
-{
-       int     i               = 0;
-       int     nthreads        = MXLND_NDAEMONS + *kmxlnd_tunables.kmx_n_waitd;
-
-       LASSERT (ni == kmxlnd_data.kmx_ni);
-       LASSERT (ni->ni_data == &kmxlnd_data);
-       CDEBUG(D_NET, "in shutdown()\n");
-
-       CDEBUG(D_MALLOC, "before MXLND cleanup: libcfs_kmemory %d "
-                        "kmx_mem_used %ld\n", atomic_read(&libcfs_kmemory),
-                        kmxlnd_data.kmx_mem_used);
-
-
-       CDEBUG(D_NET, "setting shutdown = 1\n");
-       atomic_set(&kmxlnd_data.kmx_shutdown, 1);
-
-       switch (kmxlnd_data.kmx_init) {
-
-        case MXLND_INIT_ALL:
-
-                /* calls write_[un]lock(kmx_global_lock) */
-                mxlnd_del_peer(LNET_NID_ANY);
-
-               /* wakeup request_waitds */
-               mx_wakeup(kmxlnd_data.kmx_endpt);
-               up(&kmxlnd_data.kmx_tx_queue_sem);
-               up(&kmxlnd_data.kmx_conn_sem);
-               mxlnd_sleep(msecs_to_jiffies(2 * MSEC_PER_SEC));
-
-                /* fall through */
-
-        case MXLND_INIT_THREADS:
-
-               CDEBUG(D_NET, "waiting on threads\n");
-               /* wait for threads to complete */
-               for (i = 0; i < nthreads; i++) {
-                       wait_for_completion(&kmxlnd_data.kmx_completions[i]);
-               }
-               LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
-
-                CDEBUG(D_NET, "freeing completions\n");
-                MXLND_FREE(kmxlnd_data.kmx_completions,
-                           nthreads * sizeof(struct completion));
-
-                /* fall through */
-
-        case MXLND_INIT_MX:
-
-                CDEBUG(D_NET, "stopping mx\n");
-
-                /* no peers left, close the endpoint */
-                mx_close_endpoint(kmxlnd_data.kmx_endpt);
-                mx_finalize();
-
-                /* fall through */
-
-        case MXLND_INIT_TXS:
-
-                CDEBUG(D_NET, "freeing txs\n");
-
-                /* free all txs and associated pages */
-                mxlnd_free_txs();
-
-                /* fall through */
-
-        case MXLND_INIT_DATA:
-
-                CDEBUG(D_NET, "freeing peers\n");
-
-                /* peers should be gone, but check again */
-                mxlnd_free_peers();
-
-                /* conn zombies should be gone, but check again */
-                mxlnd_free_conn_zombies();
-
-                /* fall through */
-
-        case MXLND_INIT_NOTHING:
-                break;
-        }
-        CDEBUG(D_NET, "shutdown complete\n");
-
-       CDEBUG(D_MALLOC, "after MXLND cleanup: libcfs_kmemory %d "
-                        "kmx_mem_used %ld\n", atomic_read(&libcfs_kmemory),
-                        kmxlnd_data.kmx_mem_used);
-
-       kmxlnd_data.kmx_init = MXLND_INIT_NOTHING;
-       module_put(THIS_MODULE);
-       return;
-}
-
-/**
- * mxlnd_startup - initialize state, open an endpoint, start IO
- * @ni - LNET interface handle
- *
- * Initialize state, open an endpoint, start monitoring threads.
- * Should only be called once.
- */
-int
-mxlnd_startup (lnet_ni_t *ni)
-{
-        int             i               = 0;
-        int             ret             = 0;
-        int             nthreads        = MXLND_NDAEMONS /* tx_queued, timeoutd, connd */
-                                          + *kmxlnd_tunables.kmx_n_waitd;
-        struct timeval  tv;
-
-        LASSERT (ni->ni_lnd == &the_kmxlnd);
-
-        if (kmxlnd_data.kmx_init != MXLND_INIT_NOTHING) {
-                CERROR("Only 1 instance supported\n");
-                return -EPERM;
-        }
-       CDEBUG(D_MALLOC, "before MXLND startup: libcfs_kmemory %d "
-                        "kmx_mem_used %ld\n", atomic_read(&libcfs_kmemory),
-                        kmxlnd_data.kmx_mem_used);
-
-        ni->ni_maxtxcredits = MXLND_TX_MSGS();
-        ni->ni_peertxcredits = *kmxlnd_tunables.kmx_peercredits;
-        if (ni->ni_maxtxcredits < ni->ni_peertxcredits)
-                ni->ni_maxtxcredits = ni->ni_peertxcredits;
-
-       try_module_get(THIS_MODULE);
-       memset (&kmxlnd_data, 0, sizeof (kmxlnd_data));
-
-        kmxlnd_data.kmx_ni = ni;
-        ni->ni_data = &kmxlnd_data;
-
-       do_gettimeofday(&tv);
-       kmxlnd_data.kmx_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-       CDEBUG(D_NET, "my incarnation is %llu\n", kmxlnd_data.kmx_incarnation);
-
-       rwlock_init (&kmxlnd_data.kmx_global_lock);
-       spin_lock_init (&kmxlnd_data.kmx_mem_lock);
-
-        CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_conn_reqs);
-        CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_conn_zombies);
-        CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_orphan_msgs);
-       spin_lock_init (&kmxlnd_data.kmx_conn_lock);
-       sema_init(&kmxlnd_data.kmx_conn_sem, 0);
-
-        for (i = 0; i < MXLND_HASH_SIZE; i++) {
-                CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_peers[i]);
-        }
-
-        CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_idle);
-       spin_lock_init (&kmxlnd_data.kmx_tx_idle_lock);
-       kmxlnd_data.kmx_tx_next_cookie = 1;
-       CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_queue);
-       spin_lock_init (&kmxlnd_data.kmx_tx_queue_lock);
-       sema_init(&kmxlnd_data.kmx_tx_queue_sem, 0);
-
-        kmxlnd_data.kmx_init = MXLND_INIT_DATA;
-        /*****************************************************/
-
-        ret = mxlnd_init_txs();
-        if (ret != 0) {
-                CERROR("Can't alloc tx descs: %d\n", ret);
-                goto failed;
-        }
-        kmxlnd_data.kmx_init = MXLND_INIT_TXS;
-        /*****************************************************/
-
-        ret = mxlnd_init_mx(ni);
-        if (ret != 0) {
-                CERROR("Can't init mx\n");
-                goto failed;
-        }
-
-        kmxlnd_data.kmx_init = MXLND_INIT_MX;
-        /*****************************************************/
-
-        /* start threads */
-
-        MXLND_ALLOC(kmxlnd_data.kmx_completions,
-                    nthreads * sizeof(struct completion));
-        if (kmxlnd_data.kmx_completions == NULL) {
-                CERROR("failed to alloc kmxlnd_data.kmx_completions\n");
-                goto failed;
-        }
-        memset(kmxlnd_data.kmx_completions, 0,
-              nthreads * sizeof(struct completion));
-
-        CDEBUG(D_NET, "using %d %s in mx_wait_any()\n",
-                *kmxlnd_tunables.kmx_n_waitd,
-                *kmxlnd_tunables.kmx_n_waitd == 1 ? "thread" : "threads");
-
-        for (i = 0; i < *kmxlnd_tunables.kmx_n_waitd; i++) {
-               char                    name[24];
-               memset(name, 0, sizeof(name));
-               snprintf(name, sizeof(name), "mxlnd_request_waitd_%02ld", i);
-                ret = mxlnd_thread_start(mxlnd_request_waitd, (void*)((long)i));
-               if (ret < 0) {
-                       CERROR("Starting mxlnd_request_waitd[%d] "
-                               "failed with %d\n", i, ret);
-                       atomic_set(&kmxlnd_data.kmx_shutdown, 1);
-                       mx_wakeup(kmxlnd_data.kmx_endpt);
-                       for (--i; i >= 0; i--) {
-                               wait_for_completion(&kmxlnd_data.kmx_completions[i]);
-                       }
-                       LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
-                       MXLND_FREE(kmxlnd_data.kmx_completions,
-                               nthreads * sizeof(struct completion));
-
-                       goto failed;
-               }
-       }
-       ret = mxlnd_thread_start(mxlnd_tx_queued, (void *)((long)i++),
-                                "mxlnd_tx_queued");
-       if (ret < 0) {
-               CERROR("Starting mxlnd_tx_queued failed with %d\n", ret);
-               atomic_set(&kmxlnd_data.kmx_shutdown, 1);
-               mx_wakeup(kmxlnd_data.kmx_endpt);
-               for (--i; i >= 0; i--) {
-                       wait_for_completion(&kmxlnd_data.kmx_completions[i]);
-               }
-               LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
-               MXLND_FREE(kmxlnd_data.kmx_completions,
-                       nthreads * sizeof(struct completion));
-               goto failed;
-       }
-       ret = mxlnd_thread_start(mxlnd_timeoutd, (void *)((long)i++),
-                                "mxlnd_timeoutd");
-       if (ret < 0) {
-               CERROR("Starting mxlnd_timeoutd failed with %d\n", ret);
-               atomic_set(&kmxlnd_data.kmx_shutdown, 1);
-               mx_wakeup(kmxlnd_data.kmx_endpt);
-               up(&kmxlnd_data.kmx_tx_queue_sem);
-               for (--i; i >= 0; i--) {
-                       wait_for_completion(&kmxlnd_data.kmx_completions[i]);
-               }
-               LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
-               MXLND_FREE(kmxlnd_data.kmx_completions,
-                       nthreads * sizeof(struct completion));
-               goto failed;
-       }
-       ret = mxlnd_thread_start(mxlnd_connd, (void *)((long)i++),
-                                "mxlnd_connd");
-       if (ret < 0) {
-               CERROR("Starting mxlnd_connd failed with %d\n", ret);
-               atomic_set(&kmxlnd_data.kmx_shutdown, 1);
-               mx_wakeup(kmxlnd_data.kmx_endpt);
-               up(&kmxlnd_data.kmx_tx_queue_sem);
-               for (--i; i >= 0; i--) {
-                       wait_for_completion(&kmxlnd_data.kmx_completions[i]);
-               }
-               LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
-               MXLND_FREE(kmxlnd_data.kmx_completions,
-                       nthreads * sizeof(struct completion));
-               goto failed;
-       }
-
-        kmxlnd_data.kmx_init = MXLND_INIT_THREADS;
-        /*****************************************************/
-
-        kmxlnd_data.kmx_init = MXLND_INIT_ALL;
-        CDEBUG(D_MALLOC, "startup complete (kmx_mem_used %ld)\n", kmxlnd_data.kmx_mem_used);
-
-        return 0;
-failed:
-        CERROR("mxlnd_startup failed\n");
-        mxlnd_shutdown(ni);
-        return (-ENETDOWN);
-}
-
-static int mxlnd_init(void)
-{
-        lnet_register_lnd(&the_kmxlnd);
-        return 0;
-}
-
-static void mxlnd_exit(void)
-{
-        lnet_unregister_lnd(&the_kmxlnd);
-        return;
-}
-
-module_init(mxlnd_init);
-module_exit(mxlnd_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Myricom, Inc. - help@myri.com");
-MODULE_DESCRIPTION("Kernel MyrinetExpress LND");
-MODULE_VERSION("0.6.0");
diff --git a/lnet/klnds/mxlnd/mxlnd.h b/lnet/klnds/mxlnd/mxlnd.h
deleted file mode 100644 (file)
index 43d5c13..0000000
+++ /dev/null
@@ -1,566 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2014, Intel Corporation.
- *
- * Copyright (C) 2006 Myricom, Inc.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/mxlnd/mxlnd.h
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Scott Atchley <atchley at myri.com>
- */
-
-#include <linux/module.h>       /* module */
-#include <linux/kernel.h>       /* module */
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-#include <linux/fs.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>         /* module */
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-#include <linux/utsname.h>
-#include <linux/jiffies.h>      /* msecs_to_jiffies */
-#include <asm/semaphore.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#include <asm/byteorder.h>      /* __LITTLE_ENDIAN */
-#include <net/arp.h>            /* arp table */
-#include <linux/netdevice.h>    /* get_device_by_name */
-#include <linux/inetdevice.h>   /* neigh_lookup, etc. */
-#include <linux/net.h>          /* sock_create_kern, kernel_connect, sock_release */
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include "libcfs/libcfs.h"
-#include "lnet/lnet.h"
-#include "lnet/lib-lnet.h"
-
-#define MX_KERNEL 1
-#include "mx_extensions.h"
-#include "myriexpress.h"
-
-#if LNET_MAX_IOV > MX_MAX_SEGMENTS
-    #error LNET_MAX_IOV is greater then MX_MAX_SEGMENTS
-#endif
-
-#define MXLND_MSG_MAGIC         0x4d583130              /* unique magic 'MX10' */
-#define MXLND_MSG_VERSION       0x03
-
-/* Using MX's 64 match bits
- * We are using the match bits to specify message type and the cookie.  The
- * highest four bits (60-63) are reserved for message type. Below we specify
- * the types. We reserve the remaining combinations for future use.  The next 8
- * bits (52-59) are reserved for returning a status code for failed GET_DATA
- * (payload) messages. The last 52 bits are used for cookies. That should allow
- * unique cookies for 4 KB messages at 10 Gbps line rate without rollover for
- * about 8 years. That should be enough. */
-
-#define MXLND_MSG_OFFSET        60      /* msg type offset */
-#define MXLND_MSG_BITS          (64 - MXLND_MSG_OFFSET)
-#define MXLND_MSG_MASK          (((1ULL<<MXLND_MSG_BITS) - 1) << MXLND_MSG_OFFSET)
-#define MXLND_MSG_TYPE(x)       (((x) & MXLND_MSG_MASK) >> MXLND_MSG_OFFSET)
-
-#define MXLND_ERROR_OFFSET      52      /* error value offset */
-#define MXLND_ERROR_BITS        (MXLND_MSG_OFFSET - MXLND_ERROR_OFFSET)
-#define MXLND_ERROR_MASK        (((1ULL<<MXLND_ERROR_BITS) - 1) << MXLND_ERROR_OFFSET)
-#define MXLND_ERROR_VAL(x)      (((x) & MXLND_ERROR_MASK) >> MXLND_ERROR_OFFSET)
-
-/* message types */
-#define MXLND_MSG_ICON_REQ      0xb     /* mx_iconnect() before CONN_REQ */
-#define MXLND_MSG_CONN_REQ      0xc     /* connection request */
-#define MXLND_MSG_ICON_ACK      0x9     /* mx_iconnect() before CONN_ACK */
-#define MXLND_MSG_CONN_ACK      0xa     /* connection request response */
-#define MXLND_MSG_BYE           0xd     /* disconnect msg */
-#define MXLND_MSG_EAGER         0xe     /* eager message */
-#define MXLND_MSG_NOOP          0x1     /* no msg, return credits */
-#define MXLND_MSG_PUT_REQ       0x2     /* put request src->sink */
-#define MXLND_MSG_PUT_ACK       0x3     /* put ack     src<-sink */
-#define MXLND_MSG_PUT_DATA      0x4     /* put payload src->sink */
-#define MXLND_MSG_GET_REQ       0x5     /* get request sink->src */
-#define MXLND_MSG_GET_DATA      0x6     /* get payload sink<-src */
-
-/* when to roll-over the cookie value */
-#define MXLND_MAX_COOKIE        ((1ULL << MXLND_ERROR_OFFSET) - 1)
-
-/* defaults for configurable parameters */
-#define MXLND_N_SCHED           1               /* # schedulers (mx_wait_any() threads) */
-#define MXLND_NDAEMONS          3               /* connd, timeoutd, tx_queued */
-#define MXLND_MX_BOARD          0               /* Use the first MX NIC if more than 1 avail */
-#define MXLND_MX_EP_ID          0               /* MX endpoint ID */
-/* timeout for send/recv (jiffies) */
-#define MXLND_COMM_TIMEOUT     msecs_to_jiffies(20 * MSEC_PER_SEC)
-/* timeout for wait (jiffies) */
-#define MXLND_WAIT_TIMEOUT     msecs_to_jiffies(MSEC_PER_SEC)
-/* timeout for connections (jiffies) */
-#define MXLND_CONNECT_TIMEOUT  msecs_to_jiffies(5 * MSEC_PER_SEC)
-#define MXLND_POLLING           1000            /* poll iterations before blocking */
-#define MXLND_LOOKUP_COUNT      5               /* how many times to try to resolve MAC */
-#define MXLND_MAX_PEERS         1024            /* number of nodes talking to me */
-
-#define MXLND_MSG_SIZE          (4<<10)         /* pre-posted eager message size */
-#define MXLND_MSG_QUEUE_DEPTH   8               /* default msg queue depth */
-#define MXLND_NTX               256             /* default # of tx msg descriptors */
-
-#define MXLND_HASH_BITS         6               /* the number of bits to hash over */
-#define MXLND_HASH_SIZE         (1<<MXLND_HASH_BITS)
-                                                /* number of peer lists for lookup.
-                                                   we hash over the last N bits of
-                                                   the IP address converted to an int. */
-#define MXLND_HASH_MASK         (MXLND_HASH_SIZE - 1)
-                                                /* ensure we use only the last N bits */
-
-/* derived constants... */
-/* TX messages (shared by all connections) */
-#define MXLND_TX_MSGS()       (*kmxlnd_tunables.kmx_ntx)
-#define MXLND_TX_MSG_BYTES()  (MXLND_TX_MSGS() * MXLND_MSG_SIZE)
-#define MXLND_TX_MSG_PAGES()  ((MXLND_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)
-
-/* RX messages (per connection) */
-#define MXLND_RX_MSGS()       (*kmxlnd_tunables.kmx_peercredits)
-#define MXLND_RX_MSG_BYTES()  (MXLND_RX_MSGS() * MXLND_MSG_SIZE)
-#define MXLND_RX_MSG_PAGES()  ((MXLND_RX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)
-#define MXLND_CREDIT_HIGHWATER() (*kmxlnd_tunables.kmx_peercredits - 2)
-                                                /* when to send a noop to return credits */
-
-/* debugging features */
-#define MXLND_CKSUM             0       /* checksum kmx_msg_t */
-#define MXLND_DEBUG             0       /* additional CDEBUG messages */
-
-/* provide wrappers around LIBCFS_ALLOC/FREE to keep MXLND specific
- * memory usage stats that include pages */
-
-#define MXLND_ALLOC(x, size)                                   \
-       do {                                                    \
-               spin_lock(&kmxlnd_data.kmx_mem_lock);           \
-               kmxlnd_data.kmx_mem_used += size;               \
-               spin_unlock(&kmxlnd_data.kmx_mem_lock);         \
-               LIBCFS_ALLOC(x, size);                          \
-               if (unlikely(x == NULL)) {                      \
-                       spin_lock(&kmxlnd_data.kmx_mem_lock);   \
-                       kmxlnd_data.kmx_mem_used -= size;       \
-                       spin_unlock(&kmxlnd_data.kmx_mem_lock); \
-               }                                               \
-       } while (0)
-
-#define MXLND_FREE(x, size)                                    \
-       do {                                                    \
-               spin_lock(&kmxlnd_data.kmx_mem_lock);           \
-               kmxlnd_data.kmx_mem_used -= size;               \
-               spin_unlock(&kmxlnd_data.kmx_mem_lock);         \
-               LIBCFS_FREE(x, size);                           \
-       } while (0)
-
-
-typedef struct kmx_tunables
-{
-        int     *kmx_n_waitd;           /* # completion threads */
-        int     *kmx_max_peers;         /* max # of potential peers */
-        int     *kmx_cksum;             /* checksum small msgs? */
-        int     *kmx_ntx;               /* total # of tx */
-        int     *kmx_credits;           /* concurrent sends to all peers */
-        int     *kmx_peercredits;       /* concurrent sends to 1 peer */
-        int     *kmx_board;             /* MX board (NIC) number */
-        int     *kmx_ep_id;             /* MX endpoint number */
-        char    **kmx_default_ipif;     /* IPoMX interface name */
-        int     *kmx_polling;           /* if 0, block. if > 0, poll this many
-                                           iterations before blocking */
-} kmx_tunables_t;
-
-typedef struct
-{
-        int               mxg_npages;   /* # pages */
-        struct page      *mxg_pages[0];
-} kmx_pages_t;
-
-/* global interface state */
-typedef struct kmx_data
-{
-       int                 kmx_init;           /* initialization state */
-       atomic_t            kmx_shutdown;       /* shutting down? */
-       atomic_t            kmx_nthreads;       /* number of threads */
-       struct completion   *kmx_completions;   /* array of completion struct */
-       lnet_ni_t           *kmx_ni;            /* the LND instance */
-       u64                 kmx_incarnation;    /* my incarnation value */
-       long                kmx_mem_used;       /* memory used */
-       mx_endpoint_t       kmx_endpt;          /* the MX endpoint */
-       mx_endpoint_addr_t  kmx_epa;            /* the MX endpoint address */
-
-       rwlock_t            kmx_global_lock;    /* global lock */
-       spinlock_t          kmx_mem_lock;       /* memory accounting lock */
-
-       cfs_list_t          kmx_conn_reqs;      /* list of connection reqs */
-       spinlock_t          kmx_conn_lock;      /* connection list lock */
-       struct semaphore    kmx_conn_sem;       /* connection request list */
-       cfs_list_t          kmx_conn_zombies;   /* list of zombie connections */
-       cfs_list_t          kmx_orphan_msgs;    /* list of txs to cancel */
-
-                                               /* list of all known peers */
-       cfs_list_t          kmx_peers[MXLND_HASH_SIZE];
-       atomic_t            kmx_npeers;         /* number of peers */
-
-       kmx_pages_t        *kmx_tx_pages;       /* tx msg pages */
-
-       struct kmx_ctx     *kmx_txs;            /* all tx descriptors */
-       cfs_list_t          kmx_tx_idle;        /* list of idle tx */
-       spinlock_t          kmx_tx_idle_lock;   /* lock for idle tx list */
-       s32                 kmx_tx_used;        /* txs in use */
-       u64                 kmx_tx_next_cookie; /* unique id for tx */
-       cfs_list_t          kmx_tx_queue;       /* generic send queue */
-       spinlock_t          kmx_tx_queue_lock;  /* lock for generic sends */
-       struct semaphore    kmx_tx_queue_sem;   /* semaphore for tx queue */
-} kmx_data_t;
-
-#define MXLND_INIT_NOTHING      0       /* in the beginning, there was nothing... */
-#define MXLND_INIT_DATA         1       /* main data structures created */
-#define MXLND_INIT_TXS          2       /* tx descriptors created */
-#define MXLND_INIT_MX           3       /* initiate MX library, open endpoint, get NIC id */
-#define MXLND_INIT_THREADS      4       /* waitd, timeoutd, tx_queued threads */
-#define MXLND_INIT_ALL          5       /* startup completed */
-
-/************************************************************************
- * MXLND Wire message format.
- * These are sent in sender's byte order (i.e. receiver flips).
- */
-
-typedef struct kmx_connreq_msg
-{
-        u32             mxcrm_queue_depth;              /* per peer max messages in flight */
-        u32             mxcrm_eager_size;               /* size of preposted eager messages */
-} WIRE_ATTR kmx_connreq_msg_t;
-
-typedef struct kmx_eager_msg
-{
-        lnet_hdr_t      mxem_hdr;                       /* lnet header */
-        char            mxem_payload[0];                /* piggy-backed payload */
-} WIRE_ATTR kmx_eager_msg_t;
-
-typedef struct kmx_putreq_msg
-{
-        lnet_hdr_t      mxprm_hdr;                      /* lnet header */
-        u64             mxprm_cookie;                   /* opaque completion cookie */
-} WIRE_ATTR kmx_putreq_msg_t;
-
-typedef struct kmx_putack_msg
-{
-        u64             mxpam_src_cookie;               /* reflected completion cookie */
-        u64             mxpam_dst_cookie;               /* opaque completion cookie */
-} WIRE_ATTR kmx_putack_msg_t;
-
-typedef struct kmx_getreq_msg
-{
-        lnet_hdr_t      mxgrm_hdr;                      /* lnet header */
-        u64             mxgrm_cookie;                   /* opaque completion cookie */
-} WIRE_ATTR kmx_getreq_msg_t;
-
-typedef struct kmx_msg
-{
-        /* First two fields fixed for all time */
-        u32             mxm_magic;                      /* MXLND message */
-        u16             mxm_version;                    /* version number */
-
-        u8              mxm_type;                       /* message type */
-        u8              mxm_credits;                    /* returned credits */
-        u32             mxm_nob;                        /* # of bytes in whole message */
-        u32             mxm_cksum;                      /* checksum (0 == no checksum) */
-        u64             mxm_srcnid;                     /* sender's NID */
-        u64             mxm_srcstamp;                   /* sender's incarnation */
-        u64             mxm_dstnid;                     /* destination's NID */
-        u64             mxm_dststamp;                   /* destination's incarnation */
-
-        union {
-                kmx_connreq_msg_t       conn_req;
-                kmx_eager_msg_t         eager;
-                kmx_putreq_msg_t        put_req;
-                kmx_putack_msg_t        put_ack;
-                kmx_getreq_msg_t        get_req;
-        } WIRE_ATTR mxm_u;
-} WIRE_ATTR kmx_msg_t;
-
-/***********************************************************************/
-
-enum kmx_req_type {
-        MXLND_REQ_TX    = 0,
-        MXLND_REQ_RX    = 1,
-};
-
-/* The life cycle of a request */
-enum kmx_req_state {
-        MXLND_CTX_INIT       = 0,               /* just created */
-        MXLND_CTX_IDLE       = 1,               /* available for use */
-        MXLND_CTX_PREP       = 2,               /* getting ready for send/recv */
-        MXLND_CTX_PENDING    = 3,               /* mx_isend() or mx_irecv() called */
-        MXLND_CTX_COMPLETED  = 4,               /* cleaning up after completion or timeout */
-        MXLND_CTX_CANCELED   = 5,               /* timed out but still in ctx list */
-};
-
-/* Context Structure - generic tx/rx descriptor
- * It represents the context (or state) of each send or receive request.
- * In other LNDs, they have separate TX and RX descriptors and this replaces both.
- *
- * The txs live on the global kmx_txs array for cleanup. The rxs are managed
- * per struct kmx_conn. We will move them between the rx/tx idle lists and the
- * pending list which is monitored by mxlnd_timeoutd().
- */
-typedef struct kmx_ctx
-{
-        enum kmx_req_type   mxc_type;           /* TX or RX */
-        u64                 mxc_incarnation;    /* store the peer's incarnation here
-                                                   to verify before changing flow
-                                                   control credits after completion */
-        unsigned long       mxc_deadline;       /* request time out in absolute jiffies */
-        enum kmx_req_state  mxc_state;          /* what is the state of the request? */
-        cfs_list_t          mxc_list;           /* place on rx/tx idle list, tx q, peer tx */
-        cfs_list_t          mxc_rx_list;        /* place on mxp_rx_posted list */
-
-        lnet_nid_t          mxc_nid;            /* dst's NID if peer is not known */
-        struct kmx_peer    *mxc_peer;           /* owning peer */
-        struct kmx_conn    *mxc_conn;           /* owning conn */
-        kmx_msg_t          *mxc_msg;            /* msg hdr mapped to mxc_page */
-        lnet_msg_t         *mxc_lntmsg[2];      /* lnet msgs to finalize */
-
-        u8                  mxc_msg_type;       /* what type of message is this? */
-        u64                 mxc_cookie;         /* completion cookie */
-        u64                 mxc_match;          /* MX match info */
-        mx_ksegment_t       mxc_seg;            /* local MX ksegment for non-DATA */
-        mx_ksegment_t      *mxc_seg_list;       /* MX ksegment array for DATA */
-        int                 mxc_nseg;           /* number of segments */
-        unsigned long       mxc_pin_type;       /* MX_PIN_PHYSICAL [| MX_PIN_FULLPAGES] */
-        u32                 mxc_nob;            /* number of bytes sent/received */
-        mx_request_t        mxc_mxreq;          /* MX request */
-        mx_status_t         mxc_status;         /* MX status */
-        u32                 mxc_errno;          /* errno for LNET */
-        u64                 mxc_get;            /* # of times returned from idle list */
-        u64                 mxc_put;            /* # of times returned from idle list */
-} kmx_ctx_t;
-
-#define MXLND_CONN_DISCONNECT  -2       /* conn is being destroyed - do not add txs */
-#define MXLND_CONN_FAIL        -1       /* connect failed (bad handshake, unavail, etc.) */
-#define MXLND_CONN_INIT         0       /* in the beginning, there was nothing... */
-#define MXLND_CONN_REQ          1       /* a connection request message is needed */
-#define MXLND_CONN_ACK          2       /* a connection ack is needed */
-#define MXLND_CONN_WAIT         3       /* waiting for req or ack to complete */
-#define MXLND_CONN_READY        4       /* ready to send */
-
-/* store all data from an unexpected CONN_[REQ|ACK] receive */
-typedef struct kmx_connparams
-{
-        cfs_list_t              mxr_list;       /* list to hang on kmx_conn_reqs */
-        void                   *mxr_context;    /* context - unused - will hold net */
-        mx_endpoint_addr_t      mxr_epa;        /* the peer's epa */
-        u64                     mxr_match;      /* the CONN_REQ's match bits */
-        u32                     mxr_nob;        /* length of CONN_REQ message */
-        struct kmx_peer        *mxr_peer;       /* peer if known */
-        struct kmx_conn        *mxr_conn;       /* conn if known */
-        kmx_msg_t               mxr_msg;        /* the msg header & connreq_msg_t */
-} kmx_connparams_t;
-
-/* connection state - queues for queued and pending msgs */
-typedef struct kmx_conn
-{
-        struct kmx_peer    *mxk_peer;           /* owning peer */
-        cfs_list_t          mxk_list;           /* for placing on mxp_conns */
-        cfs_list_t          mxk_zombie;         /* for placing on zombies list */
-        u64                 mxk_incarnation;    /* connections's incarnation value */
-        u32                 mxk_sid;            /* peer's MX session id */
-       atomic_t        mxk_refcount;       /* reference counting */
-        int                 mxk_status;         /* can we send messages? MXLND_CONN_* */
-
-        mx_endpoint_addr_t  mxk_epa;            /* peer's endpoint address */
-
-       spinlock_t          mxk_lock;           /* lock */
-        unsigned long       mxk_timeout;        /* expiration of oldest pending tx/rx */
-        unsigned long       mxk_last_tx;        /* when last tx completed with success */
-        unsigned long       mxk_last_rx;        /* when last rx completed */
-
-        kmx_pages_t        *mxk_rx_pages;       /* rx msg pages */
-        kmx_ctx_t          *mxk_rxs;            /* the rx descriptors */
-        cfs_list_t          mxk_rx_idle;        /* list of idle rx */
-
-        int                 mxk_credits;        /* # of my credits for sending to peer */
-        int                 mxk_outstanding;    /* # of credits to return */
-
-        cfs_list_t          mxk_tx_credit_queue; /* send queue for peer */
-        cfs_list_t          mxk_tx_free_queue;  /* send queue for peer */
-        int                 mxk_ntx_msgs;       /* # of msgs on tx queues */
-        int                 mxk_ntx_data ;      /* # of DATA on tx queues */
-        int                 mxk_ntx_posted;     /* # of tx msgs in flight */
-        int                 mxk_data_posted;    /* # of tx data payloads in flight */
-
-        cfs_list_t          mxk_pending;        /* in flight rxs and txs */
-} kmx_conn_t;
-
-/* peer state */
-typedef struct kmx_peer
-{
-        cfs_list_t          mxp_list;           /* for placing on kmx_peers */
-        lnet_nid_t          mxp_nid;            /* peer's LNET NID */
-        lnet_ni_t          *mxp_ni;             /* LNET interface */
-       atomic_t        mxp_refcount;       /* reference counts */
-
-        cfs_list_t          mxp_conns;          /* list of connections */
-        kmx_conn_t         *mxp_conn;           /* current connection */
-        cfs_list_t          mxp_tx_queue;       /* msgs waiting for a conn */
-
-        u32                 mxp_board;          /* peer's board rank */
-        u32                 mxp_ep_id;          /* peer's MX endpoint ID */
-        u64                 mxp_nic_id;         /* remote's MX nic_id for mx_connect() */
-
-        unsigned long       mxp_reconnect_time; /* when to retry connect */
-        int                 mxp_incompatible;   /* incorrect conn_req values */
-} kmx_peer_t;
-
-extern kmx_data_t       kmxlnd_data;
-extern kmx_tunables_t   kmxlnd_tunables;
-
-/* required for the LNET API */
-int  mxlnd_startup(lnet_ni_t *ni);
-void mxlnd_shutdown(lnet_ni_t *ni);
-int  mxlnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int  mxlnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int  mxlnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
-                unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
-                unsigned int offset, unsigned int mlen, unsigned int rlen);
-
-/* in mxlnd.c */
-extern void mxlnd_thread_stop(long id);
-extern void mxlnd_ctx_init(kmx_ctx_t *ctx);
-extern int  mxlnd_peer_alloc(kmx_peer_t **peerp, lnet_nid_t nid,
-                u32 board, u32 ep_id, u64 nic_id);
-extern int mxlnd_alloc_pages(kmx_pages_t **pp, int npages);
-extern void mxlnd_free_pages(kmx_pages_t *p);
-
-/* in mxlnd_cb.c */
-void mxlnd_eager_recv(void *context, uint64_t match_value, uint32_t length);
-extern mx_unexp_handler_action_t mxlnd_unexpected_recv(void *context,
-                mx_endpoint_addr_t source, uint64_t match_value, uint32_t length,
-                void *data_if_available);
-extern void mxlnd_peer_free(kmx_peer_t *peer);
-extern void mxlnd_conn_free_locked(kmx_conn_t *conn);
-extern void mxlnd_conn_disconnect(kmx_conn_t *conn, int mx_dis, int send_bye);
-extern int mxlnd_close_matching_conns(lnet_nid_t nid);
-extern void mxlnd_sleep(unsigned long timeout);
-extern int  mxlnd_tx_queued(void *arg);
-extern void mxlnd_handle_rx_completion(kmx_ctx_t *rx);
-extern int  mxlnd_check_sends(kmx_peer_t *peer);
-extern int  mxlnd_tx_peer_queued(void *arg);
-extern int  mxlnd_request_waitd(void *arg);
-extern int  mxlnd_unex_recvd(void *arg);
-extern int  mxlnd_timeoutd(void *arg);
-extern int mxlnd_free_conn_zombies(void);
-extern int  mxlnd_connd(void *arg);
-extern int mxlnd_del_peer(lnet_nid_t nid);
-
-
-/**
- * mxlnd_nid_to_hash - hash the nid
- * @nid - LNET ID
- *
- * Takes the u64 nid and XORs the lowest N bits by the next lowest N bits.
- */
-static inline int
-mxlnd_nid_to_hash(lnet_nid_t nid)
-{
-        return (nid & MXLND_HASH_MASK) ^
-               ((nid & (MXLND_HASH_MASK << MXLND_HASH_BITS)) >> MXLND_HASH_BITS);
-}
-
-
-#define mxlnd_peer_addref(peer)                                 \
-do {                                                            \
-       LASSERT(peer != NULL);                                  \
-       LASSERT(atomic_read(&(peer)->mxp_refcount) > 0);        \
-       atomic_inc(&(peer)->mxp_refcount);                      \
-} while (0)
-
-
-#define mxlnd_peer_decref(peer)                                 \
-do {                                                            \
-       LASSERT(atomic_read(&(peer)->mxp_refcount) > 0);        \
-       if (atomic_dec_and_test(&(peer)->mxp_refcount))         \
-               mxlnd_peer_free(peer);                          \
-} while (0)
-
-#define mxlnd_conn_addref(conn)                                 \
-do {                                                            \
-       LASSERT(conn != NULL);                                  \
-       LASSERT(atomic_read(&(conn)->mxk_refcount) > 0);        \
-       atomic_inc(&(conn)->mxk_refcount);                      \
-} while (0)
-
-
-#define mxlnd_conn_decref(conn)                                                \
-do {                                                                   \
-       LASSERT(conn != NULL);                                          \
-       LASSERT(atomic_read(&(conn)->mxk_refcount) > 0);                \
-       if (atomic_dec_and_test(&(conn)->mxk_refcount)) {               \
-               spin_lock(&kmxlnd_data.kmx_conn_lock);                  \
-               LASSERT((conn)->mxk_status == MXLND_CONN_DISCONNECT);   \
-               CDEBUG(D_NET, "adding conn %p to zombies\n", (conn));   \
-               cfs_list_add_tail(&(conn)->mxk_zombie,                  \
-                                &kmxlnd_data.kmx_conn_zombies);        \
-               spin_unlock(&kmxlnd_data.kmx_conn_lock);                \
-               up(&kmxlnd_data.kmx_conn_sem);                          \
-       }                                                               \
-} while (0)
-
-#define mxlnd_valid_msg_type(type)                              \
-do {                                                            \
-        LASSERT((type) == MXLND_MSG_EAGER    ||                 \
-                (type) == MXLND_MSG_ICON_REQ ||                 \
-                (type) == MXLND_MSG_CONN_REQ ||                 \
-                (type) == MXLND_MSG_ICON_ACK ||                 \
-                (type) == MXLND_MSG_CONN_ACK ||                 \
-                (type) == MXLND_MSG_BYE      ||                 \
-                (type) == MXLND_MSG_NOOP     ||                 \
-                (type) == MXLND_MSG_PUT_REQ  ||                 \
-                (type) == MXLND_MSG_PUT_ACK  ||                 \
-                (type) == MXLND_MSG_PUT_DATA ||                 \
-                (type) == MXLND_MSG_GET_REQ  ||                 \
-                (type) == MXLND_MSG_GET_DATA);                  \
-} while (0)
diff --git a/lnet/klnds/mxlnd/mxlnd_cb.c b/lnet/klnds/mxlnd/mxlnd_cb.c
deleted file mode 100644 (file)
index d40d1c6..0000000
+++ /dev/null
@@ -1,4088 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2014, Intel Corporation.
- *
- * Copyright (C) 2006 Myricom, Inc.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/mxlnd/mxlnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Scott Atchley <atchley at myri.com>
- */
-
-#include <asm/page.h>
-#include "mxlnd.h"
-
-mx_endpoint_addr_t MX_EPA_NULL; /* use to determine if an endpoint is NULL */
-
-inline int
-mxlnd_endpoint_addr_null(mx_endpoint_addr_t epa)
-{
-        /* if memcmp() == 0, it is NULL */
-        return !(memcmp(&epa, &MX_EPA_NULL, sizeof(epa)));
-}
-
-char *
-mxlnd_ctxstate_to_str(int mxc_state)
-{
-        switch (mxc_state) {
-        case MXLND_CTX_INIT:
-                return "MXLND_CTX_INIT";
-        case MXLND_CTX_IDLE:
-                return "MXLND_CTX_IDLE";
-        case MXLND_CTX_PREP:
-                return "MXLND_CTX_PREP";
-        case MXLND_CTX_PENDING:
-                return "MXLND_CTX_PENDING";
-        case MXLND_CTX_COMPLETED:
-                return "MXLND_CTX_COMPLETED";
-        case MXLND_CTX_CANCELED:
-                return "MXLND_CTX_CANCELED";
-        default:
-                return "*unknown*";
-        }
-}
-
-char *
-mxlnd_connstatus_to_str(int mxk_status)
-{
-        switch (mxk_status) {
-        case MXLND_CONN_READY:
-                return "MXLND_CONN_READY";
-        case MXLND_CONN_INIT:
-                return "MXLND_CONN_INIT";
-        case MXLND_CONN_WAIT:
-                return "MXLND_CONN_WAIT";
-        case MXLND_CONN_DISCONNECT:
-                return "MXLND_CONN_DISCONNECT";
-        case MXLND_CONN_FAIL:
-                return "MXLND_CONN_FAIL";
-        default:
-                return "unknown";
-        }
-}
-
-char *
-mxlnd_msgtype_to_str(int type) {
-        switch (type) {
-        case MXLND_MSG_EAGER:
-                return "MXLND_MSG_EAGER";
-        case MXLND_MSG_CONN_REQ:
-                return "MXLND_MSG_CONN_REQ";
-        case MXLND_MSG_CONN_ACK:
-                return "MXLND_MSG_CONN_ACK";
-        case MXLND_MSG_BYE:
-                return "MXLND_MSG_BYE";
-        case MXLND_MSG_NOOP:
-                return "MXLND_MSG_NOOP";
-        case MXLND_MSG_PUT_REQ:
-                return "MXLND_MSG_PUT_REQ";
-        case MXLND_MSG_PUT_ACK:
-                return "MXLND_MSG_PUT_ACK";
-        case MXLND_MSG_PUT_DATA:
-                return "MXLND_MSG_PUT_DATA";
-        case MXLND_MSG_GET_REQ:
-                return "MXLND_MSG_GET_REQ";
-        case MXLND_MSG_GET_DATA:
-                return "MXLND_MSG_GET_DATA";
-        default:
-                return "unknown";
-        }
-}
-
-char *
-mxlnd_lnetmsg_to_str(int type)
-{
-        switch (type) {
-        case LNET_MSG_ACK:
-                return "LNET_MSG_ACK";
-        case LNET_MSG_PUT:
-                return "LNET_MSG_PUT";
-        case LNET_MSG_GET:
-                return "LNET_MSG_GET";
-        case LNET_MSG_REPLY:
-                return "LNET_MSG_REPLY";
-        case LNET_MSG_HELLO:
-                return "LNET_MSG_HELLO";
-        default:
-                LBUG();
-                return "*unknown*";
-        }
-}
-
-static inline u64
-mxlnd_create_match(kmx_ctx_t *ctx, u8 error)
-{
-        u64 type        = (u64) ctx->mxc_msg_type;
-        u64 err         = (u64) error;
-        u64 match       = 0ULL;
-
-        mxlnd_valid_msg_type(ctx->mxc_msg_type);
-        LASSERT(ctx->mxc_cookie >> MXLND_ERROR_OFFSET == 0);
-        match = (type << MXLND_MSG_OFFSET) | (err << MXLND_ERROR_OFFSET) | ctx->mxc_cookie;
-        return match;
-}
-
-static inline void
-mxlnd_parse_match(u64 match, u8 *msg_type, u8 *error, u64 *cookie)
-{
-        *msg_type = (u8) MXLND_MSG_TYPE(match);
-        *error    = (u8) MXLND_ERROR_VAL(match);
-        *cookie   = match & MXLND_MAX_COOKIE;
-        mxlnd_valid_msg_type(*msg_type);
-        return;
-}
-
-kmx_ctx_t *
-mxlnd_get_idle_rx(kmx_conn_t *conn)
-{
-        cfs_list_t              *rxs    = NULL;
-        kmx_ctx_t               *rx     = NULL;
-
-        LASSERT(conn != NULL);
-
-        rxs = &conn->mxk_rx_idle;
-
-       spin_lock(&conn->mxk_lock);
-
-       if (cfs_list_empty (rxs)) {
-               spin_unlock(&conn->mxk_lock);
-               return NULL;
-       }
-
-       rx = cfs_list_entry (rxs->next, kmx_ctx_t, mxc_list);
-       cfs_list_del_init(&rx->mxc_list);
-       spin_unlock(&conn->mxk_lock);
-
-#if MXLND_DEBUG
-        if (rx->mxc_get != rx->mxc_put) {
-                CNETERR("*** RX get (%llu) != put (%llu) ***\n", rx->mxc_get, rx->mxc_put);
-                CNETERR("*** incarnation= %lld ***\n", rx->mxc_incarnation);
-                CNETERR("*** deadline= %ld ***\n", rx->mxc_deadline);
-                CNETERR("*** state= %s ***\n", mxlnd_ctxstate_to_str(rx->mxc_state));
-                CNETERR("*** listed?= %d ***\n", !cfs_list_empty(&rx->mxc_list));
-                CNETERR("*** nid= 0x%llx ***\n", rx->mxc_nid);
-                CNETERR("*** peer= 0x%p ***\n", rx->mxc_peer);
-                CNETERR("*** msg_type= %s ***\n", mxlnd_msgtype_to_str(rx->mxc_msg_type));
-                CNETERR("*** cookie= 0x%llx ***\n", rx->mxc_cookie);
-                CNETERR("*** nob= %d ***\n", rx->mxc_nob);
-        }
-#endif
-        LASSERT (rx->mxc_get == rx->mxc_put);
-
-        rx->mxc_get++;
-
-        LASSERT (rx->mxc_state == MXLND_CTX_IDLE);
-        rx->mxc_state = MXLND_CTX_PREP;
-        rx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT;
-
-        return rx;
-}
-
-int
-mxlnd_put_idle_rx(kmx_ctx_t *rx)
-{
-        kmx_conn_t              *conn   = rx->mxc_conn;
-        cfs_list_t              *rxs    = &conn->mxk_rx_idle;
-
-        LASSERT(rx->mxc_type == MXLND_REQ_RX);
-
-        mxlnd_ctx_init(rx);
-
-        rx->mxc_put++;
-        LASSERT(rx->mxc_get == rx->mxc_put);
-
-       spin_lock(&conn->mxk_lock);
-       cfs_list_add(&rx->mxc_list, rxs);
-       spin_unlock(&conn->mxk_lock);
-       return 0;
-}
-
-kmx_ctx_t *
-mxlnd_get_idle_tx(void)
-{
-       cfs_list_t              *tmp    = &kmxlnd_data.kmx_tx_idle;
-       kmx_ctx_t               *tx     = NULL;
-
-       spin_lock(&kmxlnd_data.kmx_tx_idle_lock);
-
-       if (cfs_list_empty (&kmxlnd_data.kmx_tx_idle)) {
-               CNETERR("%d txs in use\n", kmxlnd_data.kmx_tx_used);
-               spin_unlock(&kmxlnd_data.kmx_tx_idle_lock);
-               return NULL;
-       }
-
-        tmp = &kmxlnd_data.kmx_tx_idle;
-        tx = cfs_list_entry (tmp->next, kmx_ctx_t, mxc_list);
-        cfs_list_del_init(&tx->mxc_list);
-
-        /* Allocate a new completion cookie.  It might not be needed,
-         * but we've got a lock right now and we're unlikely to
-         * wrap... */
-        tx->mxc_cookie = kmxlnd_data.kmx_tx_next_cookie++;
-        if (kmxlnd_data.kmx_tx_next_cookie > MXLND_MAX_COOKIE) {
-                kmxlnd_data.kmx_tx_next_cookie = 1;
-        }
-        kmxlnd_data.kmx_tx_used++;
-       spin_unlock(&kmxlnd_data.kmx_tx_idle_lock);
-
-        LASSERT (tx->mxc_get == tx->mxc_put);
-
-        tx->mxc_get++;
-
-        LASSERT (tx->mxc_state == MXLND_CTX_IDLE);
-        LASSERT (tx->mxc_lntmsg[0] == NULL);
-        LASSERT (tx->mxc_lntmsg[1] == NULL);
-
-        tx->mxc_state = MXLND_CTX_PREP;
-        tx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT;
-
-        return tx;
-}
-
-void
-mxlnd_conn_disconnect(kmx_conn_t *conn, int mx_dis, int send_bye);
-
-int
-mxlnd_put_idle_tx(kmx_ctx_t *tx)
-{
-        int             result  = 0;
-        lnet_msg_t      *lntmsg[2];
-
-        LASSERT(tx->mxc_type == MXLND_REQ_TX);
-
-        if (tx->mxc_status.code != MX_STATUS_SUCCESS || tx->mxc_errno != 0) {
-                kmx_conn_t      *conn   = tx->mxc_conn;
-
-                result = -EIO;
-                if (tx->mxc_errno != 0) result = tx->mxc_errno;
-                /* FIXME should we set mx_dis? */
-                mxlnd_conn_disconnect(conn, 0, 1);
-        }
-
-        lntmsg[0] = tx->mxc_lntmsg[0];
-        lntmsg[1] = tx->mxc_lntmsg[1];
-
-        mxlnd_ctx_init(tx);
-
-        tx->mxc_put++;
-        LASSERT(tx->mxc_get == tx->mxc_put);
-
-       spin_lock(&kmxlnd_data.kmx_tx_idle_lock);
-       cfs_list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle);
-       kmxlnd_data.kmx_tx_used--;
-       spin_unlock(&kmxlnd_data.kmx_tx_idle_lock);
-
-       if (lntmsg[0] != NULL)
-               lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[0], result);
-       if (lntmsg[1] != NULL)
-               lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[1], result);
-       return 0;
-}
-
-
-void
-mxlnd_connparams_free(kmx_connparams_t *cp)
-{
-        LASSERT(cfs_list_empty(&cp->mxr_list));
-        MXLND_FREE(cp, sizeof(*cp));
-        return;
-}
-
-int
-mxlnd_connparams_alloc(kmx_connparams_t **cp, void *context,
-                            mx_endpoint_addr_t epa, u64 match, u32 length,
-                            kmx_conn_t *conn, kmx_peer_t *peer, void *data)
-{
-        kmx_connparams_t *c = NULL;
-
-        MXLND_ALLOC(c, sizeof(*c));
-        if (!c) return -ENOMEM;
-
-        CFS_INIT_LIST_HEAD(&c->mxr_list);
-        c->mxr_context = context;
-        c->mxr_epa = epa;
-        c->mxr_match = match;
-        c->mxr_nob = length;
-        c->mxr_conn = conn;
-        c->mxr_peer = peer;
-        c->mxr_msg = *((kmx_msg_t *) data);
-
-        *cp = c;
-        return 0;
-}
-
-static inline void
-mxlnd_set_conn_status(kmx_conn_t *conn, int status)
-{
-       conn->mxk_status = status;
-       smp_mb();
-}
-
-/**
- * mxlnd_conn_free_locked - free the conn
- * @conn - a kmx_conn pointer
- *
- * The calling function should remove the conn from the conns list first
- * then destroy it. Caller should have write-locked kmx_global_lock.
- */
-void
-mxlnd_conn_free_locked(kmx_conn_t *conn)
-{
-        int             valid   = !mxlnd_endpoint_addr_null(conn->mxk_epa);
-        kmx_peer_t      *peer   = conn->mxk_peer;
-
-        CDEBUG(D_NET, "freeing conn 0x%p *****\n", conn);
-        LASSERT (cfs_list_empty (&conn->mxk_tx_credit_queue) &&
-                 cfs_list_empty (&conn->mxk_tx_free_queue) &&
-                 cfs_list_empty (&conn->mxk_pending));
-        if (!cfs_list_empty(&conn->mxk_list)) {
-                cfs_list_del_init(&conn->mxk_list);
-                if (peer->mxp_conn == conn) {
-                        peer->mxp_conn = NULL;
-                        if (valid) {
-                                kmx_conn_t      *temp   = NULL;
-
-                                mx_get_endpoint_addr_context(conn->mxk_epa,
-                                                             (void **) &temp);
-                                if (conn == temp) {
-                                        mx_set_endpoint_addr_context(conn->mxk_epa,
-                                                                     (void *) NULL);
-                                }
-                        }
-                        /* unlink from global list and drop its ref */
-                        cfs_list_del_init(&peer->mxp_list);
-                        mxlnd_peer_decref(peer);
-                }
-        }
-        mxlnd_peer_decref(peer); /* drop conn's ref to peer */
-        if (conn->mxk_rx_pages) {
-                LASSERT (conn->mxk_rxs != NULL);
-                mxlnd_free_pages(conn->mxk_rx_pages);
-        }
-        if (conn->mxk_rxs) {
-                int             i       = 0;
-                kmx_ctx_t       *rx     = NULL;
-
-                for (i = 0; i < MXLND_RX_MSGS(); i++) {
-                        rx = &conn->mxk_rxs[i];
-                        if (rx->mxc_seg_list != NULL) {
-                                LASSERT(rx->mxc_nseg > 0);
-                                MXLND_FREE(rx->mxc_seg_list,
-                                           rx->mxc_nseg *
-                                           sizeof(*rx->mxc_seg_list));
-                        }
-                }
-                MXLND_FREE(conn->mxk_rxs, MXLND_RX_MSGS() * sizeof(kmx_ctx_t));
-        }
-
-        MXLND_FREE(conn, sizeof (*conn));
-        return;
-}
-
-
-int
-mxlnd_conn_cancel_pending_rxs(kmx_conn_t *conn)
-{
-        int             found   = 0;
-        int             count   = 0;
-        kmx_ctx_t       *ctx    = NULL;
-        kmx_ctx_t       *next   = NULL;
-        mx_return_t     mxret   = MX_SUCCESS;
-        u32             result  = 0;
-
-        do {
-                found = 0;
-               spin_lock(&conn->mxk_lock);
-                cfs_list_for_each_entry_safe(ctx, next, &conn->mxk_pending,
-                                             mxc_list) {
-                        cfs_list_del_init(&ctx->mxc_list);
-                        if (ctx->mxc_type == MXLND_REQ_RX) {
-                                found = 1;
-                                mxret = mx_cancel(kmxlnd_data.kmx_endpt,
-                                                  &ctx->mxc_mxreq,
-                                                  &result);
-                                if (mxret != MX_SUCCESS) {
-                                        CNETERR("mx_cancel() returned %s (%d)\n", mx_strerror(mxret), mxret);
-                                }
-                                if (result == 1) {
-                                        ctx->mxc_errno = -ECONNABORTED;
-                                        ctx->mxc_state = MXLND_CTX_CANCELED;
-                                       spin_unlock(&conn->mxk_lock);
-                                       spin_lock(&kmxlnd_data.kmx_conn_lock);
-                                        /* we may be holding the global lock,
-                                         * move to orphan list so that it can free it */
-                                        cfs_list_add_tail(&ctx->mxc_list,
-                                                          &kmxlnd_data.kmx_orphan_msgs);
-                                        count++;
-                                       spin_unlock(&kmxlnd_data.kmx_conn_lock);
-                                       spin_lock(&conn->mxk_lock);
-                               }
-                               break;
-                       }
-               }
-               spin_unlock(&conn->mxk_lock);
-       } while (found);
-
-       return count;
-}
-
-int
-mxlnd_cancel_queued_txs(kmx_conn_t *conn)
-{
-       int             count   = 0;
-       cfs_list_t      *tmp    = NULL;
-
-       spin_lock(&conn->mxk_lock);
-        while (!cfs_list_empty(&conn->mxk_tx_free_queue) ||
-               !cfs_list_empty(&conn->mxk_tx_credit_queue)) {
-
-                kmx_ctx_t       *tx     = NULL;
-
-                if (!cfs_list_empty(&conn->mxk_tx_free_queue)) {
-                        tmp = &conn->mxk_tx_free_queue;
-                } else {
-                        tmp = &conn->mxk_tx_credit_queue;
-                }
-
-                tx = cfs_list_entry(tmp->next, kmx_ctx_t, mxc_list);
-                cfs_list_del_init(&tx->mxc_list);
-               spin_unlock(&conn->mxk_lock);
-               tx->mxc_errno = -ECONNABORTED;
-               tx->mxc_state = MXLND_CTX_CANCELED;
-               /* move to orphan list and then abort */
-               spin_lock(&kmxlnd_data.kmx_conn_lock);
-               cfs_list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_orphan_msgs);
-               spin_unlock(&kmxlnd_data.kmx_conn_lock);
-               count++;
-               spin_lock(&conn->mxk_lock);
-       }
-       spin_unlock(&conn->mxk_lock);
-
-       return count;
-}
-
-void
-mxlnd_send_message(mx_endpoint_addr_t epa, u8 msg_type, int error, u64 cookie)
-{
-        u64 match = (((u64) msg_type) << MXLND_MSG_OFFSET) |
-                    (((u64) error) << MXLND_ERROR_OFFSET) | cookie;
-
-        mx_kisend(kmxlnd_data.kmx_endpt, NULL, 0, MX_PIN_PHYSICAL,
-                  epa, match, NULL, NULL);
-        return;
-}
-
-/**
- * mxlnd_conn_disconnect - shutdown a connection
- * @conn - a kmx_conn pointer
- * @mx_dis - call mx_disconnect()
- * @send_bye - send peer a BYE msg
- *
- * This function sets the status to DISCONNECT, completes queued
- * txs with failure, calls mx_disconnect, which will complete
- * pending txs and matched rxs with failure.
- */
-void
-mxlnd_conn_disconnect(kmx_conn_t *conn, int mx_dis, int send_bye)
-{
-       mx_endpoint_addr_t      epa     = conn->mxk_epa;
-       int                     valid   = !mxlnd_endpoint_addr_null(epa);
-       int                     count   = 0;
-
-       spin_lock(&conn->mxk_lock);
-       if (conn->mxk_status == MXLND_CONN_DISCONNECT) {
-               spin_unlock(&conn->mxk_lock);
-               return;
-       }
-       mxlnd_set_conn_status(conn, MXLND_CONN_DISCONNECT);
-       conn->mxk_timeout = 0;
-       spin_unlock(&conn->mxk_lock);
-
-       count = mxlnd_cancel_queued_txs(conn);
-       count += mxlnd_conn_cancel_pending_rxs(conn);
-
-       if (count) /* let connd call kmxlnd_abort_msgs() */
-               up(&kmxlnd_data.kmx_conn_sem);
-
-        if (send_bye && valid &&
-            conn->mxk_peer->mxp_nid != kmxlnd_data.kmx_ni->ni_nid) {
-                /* send a BYE to the peer */
-                CDEBUG(D_NET, "%s: sending a BYE msg to %s\n", __func__,
-                                libcfs_nid2str(conn->mxk_peer->mxp_nid));
-                mxlnd_send_message(epa, MXLND_MSG_BYE, 0, 0);
-                /* wait to allow the peer to ack our message */
-                mxlnd_sleep(msecs_to_jiffies(20));
-        }
-
-       if (atomic_read(&kmxlnd_data.kmx_shutdown) != 1) {
-               unsigned long   last_msg        = 0;
-
-                /* notify LNET that we are giving up on this peer */
-                if (cfs_time_after(conn->mxk_last_rx, conn->mxk_last_tx))
-                        last_msg = conn->mxk_last_rx;
-                else
-                        last_msg = conn->mxk_last_tx;
-
-                lnet_notify(kmxlnd_data.kmx_ni, conn->mxk_peer->mxp_nid, 0, last_msg);
-
-                if (mx_dis && valid &&
-                    (memcmp(&epa, &kmxlnd_data.kmx_epa, sizeof(epa) != 0)))
-                        mx_disconnect(kmxlnd_data.kmx_endpt, epa);
-        }
-        mxlnd_conn_decref(conn); /* drop the owning peer's reference */
-
-        return;
-}
-
-/**
- * mxlnd_conn_alloc - allocate and initialize a new conn struct
- * @connp - address of a kmx_conn pointer
- * @peer - owning kmx_peer
- *
- * Returns 0 on success and -ENOMEM on failure
- */
-int
-mxlnd_conn_alloc_locked(kmx_conn_t **connp, kmx_peer_t *peer)
-{
-        int             i       = 0;
-        int             ret     = 0;
-        int             ipage   = 0;
-        int             offset  = 0;
-        void           *addr    = NULL;
-        kmx_conn_t     *conn    = NULL;
-        kmx_pages_t    *pages   = NULL;
-        struct page    *page    = NULL;
-        kmx_ctx_t      *rx      = NULL;
-
-        LASSERT(peer != NULL);
-
-        MXLND_ALLOC(conn, sizeof (*conn));
-        if (conn == NULL) {
-                CNETERR("Cannot allocate conn\n");
-                return -ENOMEM;
-        }
-        CDEBUG(D_NET, "allocated conn 0x%p for peer 0x%p\n", conn, peer);
-
-        memset(conn, 0, sizeof(*conn));
-
-        ret = mxlnd_alloc_pages(&pages, MXLND_RX_MSG_PAGES());
-        if (ret != 0) {
-                CERROR("Can't allocate rx pages\n");
-                MXLND_FREE(conn, sizeof(*conn));
-                return -ENOMEM;
-        }
-        conn->mxk_rx_pages = pages;
-
-        MXLND_ALLOC(conn->mxk_rxs, MXLND_RX_MSGS() * sizeof(kmx_ctx_t));
-        if (conn->mxk_rxs == NULL) {
-                CERROR("Can't allocate %d rx descriptors\n", MXLND_RX_MSGS());
-                mxlnd_free_pages(pages);
-                MXLND_FREE(conn, sizeof(*conn));
-                return -ENOMEM;
-        }
-
-        memset(conn->mxk_rxs, 0, MXLND_RX_MSGS() * sizeof(kmx_ctx_t));
-
-       conn->mxk_peer = peer;
-       CFS_INIT_LIST_HEAD(&conn->mxk_list);
-       CFS_INIT_LIST_HEAD(&conn->mxk_zombie);
-       atomic_set(&conn->mxk_refcount, 2); /* ref for owning peer
-                                                  and one for the caller */
-       if (peer->mxp_nid == kmxlnd_data.kmx_ni->ni_nid) {
-               u64     nic_id  = 0ULL;
-                u32     ep_id   = 0;
-
-                /* this is localhost, set the epa and status as up */
-                mxlnd_set_conn_status(conn, MXLND_CONN_READY);
-                conn->mxk_epa = kmxlnd_data.kmx_epa;
-                mx_set_endpoint_addr_context(conn->mxk_epa, (void *) conn);
-                peer->mxp_reconnect_time = 0;
-                mx_decompose_endpoint_addr(kmxlnd_data.kmx_epa, &nic_id, &ep_id);
-                peer->mxp_nic_id = nic_id;
-                peer->mxp_ep_id = ep_id;
-                conn->mxk_incarnation = kmxlnd_data.kmx_incarnation;
-                conn->mxk_timeout = 0;
-        } else {
-                /* conn->mxk_incarnation = 0 - will be set by peer */
-                /* conn->mxk_sid = 0 - will be set by peer */
-                mxlnd_set_conn_status(conn, MXLND_CONN_INIT);
-                /* mxk_epa - to be set after mx_iconnect() */
-        }
-       spin_lock_init(&conn->mxk_lock);
-        /* conn->mxk_timeout = 0 */
-        /* conn->mxk_last_tx = 0 */
-        /* conn->mxk_last_rx = 0 */
-        CFS_INIT_LIST_HEAD(&conn->mxk_rx_idle);
-
-        conn->mxk_credits = *kmxlnd_tunables.kmx_peercredits;
-        /* mxk_outstanding = 0 */
-
-        CFS_INIT_LIST_HEAD(&conn->mxk_tx_credit_queue);
-        CFS_INIT_LIST_HEAD(&conn->mxk_tx_free_queue);
-        /* conn->mxk_ntx_msgs = 0 */
-        /* conn->mxk_ntx_data = 0 */
-        /* conn->mxk_ntx_posted = 0 */
-        /* conn->mxk_data_posted = 0 */
-        CFS_INIT_LIST_HEAD(&conn->mxk_pending);
-
-        for (i = 0; i < MXLND_RX_MSGS(); i++) {
-
-                rx = &conn->mxk_rxs[i];
-                rx->mxc_type = MXLND_REQ_RX;
-                CFS_INIT_LIST_HEAD(&rx->mxc_list);
-
-                /* map mxc_msg to page */
-                page = pages->mxg_pages[ipage];
-                addr = page_address(page);
-                LASSERT(addr != NULL);
-                rx->mxc_msg = (kmx_msg_t *)(addr + offset);
-                rx->mxc_seg.segment_ptr = MX_PA_TO_U64(virt_to_phys(rx->mxc_msg));
-
-                rx->mxc_conn = conn;
-                rx->mxc_peer = peer;
-                rx->mxc_nid = peer->mxp_nid;
-
-                mxlnd_ctx_init(rx);
-
-                offset += MXLND_MSG_SIZE;
-                LASSERT (offset <= PAGE_SIZE);
-
-                if (offset == PAGE_SIZE) {
-                        offset = 0;
-                        ipage++;
-                        LASSERT (ipage <= MXLND_TX_MSG_PAGES());
-                }
-
-                cfs_list_add_tail(&rx->mxc_list, &conn->mxk_rx_idle);
-        }
-
-        *connp = conn;
-
-        mxlnd_peer_addref(peer);        /* add a ref for this conn */
-
-        /* add to front of peer's conns list */
-        cfs_list_add(&conn->mxk_list, &peer->mxp_conns);
-        peer->mxp_conn = conn;
-        return 0;
-}
-
-int
-mxlnd_conn_alloc(kmx_conn_t **connp, kmx_peer_t *peer)
-{
-        int             ret     = 0;
-       rwlock_t   *g_lock  = &kmxlnd_data.kmx_global_lock;
-
-       write_lock(g_lock);
-        ret = mxlnd_conn_alloc_locked(connp, peer);
-       write_unlock(g_lock);
-        return ret;
-}
-
-int
-mxlnd_q_pending_ctx(kmx_ctx_t *ctx)
-{
-       int             ret     = 0;
-       kmx_conn_t      *conn   = ctx->mxc_conn;
-
-       ctx->mxc_state = MXLND_CTX_PENDING;
-       if (conn != NULL) {
-               spin_lock(&conn->mxk_lock);
-                if (conn->mxk_status >= MXLND_CONN_INIT) {
-                        cfs_list_add_tail(&ctx->mxc_list, &conn->mxk_pending);
-                        if (conn->mxk_timeout == 0 || ctx->mxc_deadline < conn->mxk_timeout) {
-                                conn->mxk_timeout = ctx->mxc_deadline;
-                        }
-                } else {
-                        ctx->mxc_state = MXLND_CTX_COMPLETED;
-                        ret = -1;
-                }
-               spin_unlock(&conn->mxk_lock);
-       }
-       return ret;
-}
-
-int
-mxlnd_deq_pending_ctx(kmx_ctx_t *ctx)
-{
-        LASSERT(ctx->mxc_state == MXLND_CTX_PENDING ||
-                ctx->mxc_state == MXLND_CTX_COMPLETED);
-        if (ctx->mxc_state != MXLND_CTX_PENDING &&
-            ctx->mxc_state != MXLND_CTX_COMPLETED) {
-                CNETERR("deq ctx->mxc_state = %s\n",
-                        mxlnd_ctxstate_to_str(ctx->mxc_state));
-        }
-        ctx->mxc_state = MXLND_CTX_COMPLETED;
-        if (!cfs_list_empty(&ctx->mxc_list)) {
-                kmx_conn_t      *conn = ctx->mxc_conn;
-                kmx_ctx_t       *next = NULL;
-
-                LASSERT(conn != NULL);
-               spin_lock(&conn->mxk_lock);
-                cfs_list_del_init(&ctx->mxc_list);
-                conn->mxk_timeout = 0;
-                if (!cfs_list_empty(&conn->mxk_pending)) {
-                        next = cfs_list_entry(conn->mxk_pending.next,
-                                              kmx_ctx_t, mxc_list);
-                        conn->mxk_timeout = next->mxc_deadline;
-                }
-               spin_unlock(&conn->mxk_lock);
-       }
-       return 0;
-}
-
-/**
- * mxlnd_peer_free - free the peer
- * @peer - a kmx_peer pointer
- *
- * The calling function should decrement the rxs, drain the tx queues and
- * remove the peer from the peers list first then destroy it.
- */
-void
-mxlnd_peer_free(kmx_peer_t *peer)
-{
-       CDEBUG(D_NET, "freeing peer 0x%p %s\n", peer, libcfs_nid2str(peer->mxp_nid));
-
-       LASSERT (atomic_read(&peer->mxp_refcount) == 0);
-
-       if (!cfs_list_empty(&peer->mxp_list)) {
-               /* assume we are locked */
-               cfs_list_del_init(&peer->mxp_list);
-       }
-
-       MXLND_FREE(peer, sizeof (*peer));
-       atomic_dec(&kmxlnd_data.kmx_npeers);
-       return;
-}
-
-static int
-mxlnd_lookup_mac(u32 ip, u64 *tmp_id)
-{
-        int                     ret     = -EHOSTUNREACH;
-        unsigned char           *haddr  = NULL;
-        struct net_device       *dev    = NULL;
-        struct neighbour        *n      = NULL;
-        __be32                  dst_ip  = htonl(ip);
-
-        dev = dev_get_by_name(*kmxlnd_tunables.kmx_default_ipif);
-        if (dev == NULL)
-                return -ENODEV;
-
-        haddr = (unsigned char *) tmp_id + 2; /* MAC is only 6 bytes */
-
-        n = neigh_lookup(&arp_tbl, &dst_ip, dev);
-        if (n) {
-                n->used = jiffies;
-                if (n->nud_state & NUD_VALID) {
-                        memcpy(haddr, n->ha, dev->addr_len);
-                        neigh_release(n);
-                        ret = 0;
-                }
-        }
-
-        dev_put(dev);
-
-        return ret;
-}
-
-
-/* We only want the MAC address of the peer's Myricom NIC. We
- * require that each node has the IPoMX interface (myriN) up.
- * We will not pass any traffic over IPoMX, but it allows us
- * to get the MAC address. */
-static int
-mxlnd_ip2nic_id(u32 ip, u64 *nic_id, int tries)
-{
-        int                     ret     = 0;
-        int                     try     = 1;
-        int                     fatal   = 0;
-        u64                     tmp_id  = 0ULL;
-        cfs_socket_t            *sock   = NULL;
-
-        do {
-                CDEBUG(D_NET, "try %d of %d tries\n", try, tries);
-                ret = mxlnd_lookup_mac(ip, &tmp_id);
-                if (ret == 0) {
-                        break;
-                } else {
-                        /* not found, try to connect (force an arp) */
-                        ret = libcfs_sock_connect(&sock, &fatal, 0, 0, ip, 987);
-                        if (ret == -ECONNREFUSED) {
-                                /* peer is there, get the MAC address */
-                                mxlnd_lookup_mac(ip, &tmp_id);
-                                if (tmp_id != 0ULL)
-                                        ret = 0;
-                                break;
-                       } else if (ret == -EHOSTUNREACH && try < tries) {
-                               /* add a little backoff */
-                               CDEBUG(D_NET, "sleeping for %lu jiffies\n",
-                                      msecs_to_jiffies(MSEC_PER_SEC / 4));
-                               mxlnd_sleep(msecs_to_jiffies(MSEC_PER_SEC / 4));
-                       }
-                }
-        } while (try++ < tries);
-        CDEBUG(D_NET, "done trying. ret = %d\n", ret);
-
-        if (tmp_id == 0ULL)
-                ret = -EHOSTUNREACH;
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-        *nic_id = ___arch__swab64(tmp_id);
-#else
-        *nic_id = tmp_id;
-#endif
-        return ret;
-}
-
-/**
- * mxlnd_peer_alloc - allocate and initialize a new peer struct
- * @peerp - address of a kmx_peer pointer
- * @nid - LNET node id
- *
- * Returns 0 on success and -ENOMEM on failure
- */
-int
-mxlnd_peer_alloc(kmx_peer_t **peerp, lnet_nid_t nid, u32 board, u32 ep_id, u64 nic_id)
-{
-        int             ret     = 0;
-        u32             ip      = LNET_NIDADDR(nid);
-        kmx_peer_t     *peer    = NULL;
-
-        LASSERT (nid != LNET_NID_ANY && nid != 0LL);
-
-        MXLND_ALLOC(peer, sizeof (*peer));
-        if (peer == NULL) {
-                CNETERR("Cannot allocate peer for NID 0x%llx\n",
-                       nid);
-                return -ENOMEM;
-        }
-        CDEBUG(D_NET, "allocated peer 0x%p for NID 0x%llx\n", peer, nid);
-
-        memset(peer, 0, sizeof(*peer));
-
-       CFS_INIT_LIST_HEAD(&peer->mxp_list);
-       peer->mxp_nid = nid;
-       /* peer->mxp_ni unused - may be used for multi-rail */
-       atomic_set(&peer->mxp_refcount, 1);     /* ref for kmx_peers list */
-
-        peer->mxp_board = board;
-        peer->mxp_ep_id = ep_id;
-        peer->mxp_nic_id = nic_id;
-
-        CFS_INIT_LIST_HEAD(&peer->mxp_conns);
-        ret = mxlnd_conn_alloc(&peer->mxp_conn, peer); /* adds 2nd conn ref here... */
-        if (ret != 0) {
-                mxlnd_peer_decref(peer);
-                return ret;
-        }
-        CFS_INIT_LIST_HEAD(&peer->mxp_tx_queue);
-
-        if (peer->mxp_nic_id != 0ULL)
-                nic_id = peer->mxp_nic_id;
-
-        if (nic_id == 0ULL) {
-                ret = mxlnd_ip2nic_id(ip, &nic_id, 1);
-                if (ret == 0) {
-                        peer->mxp_nic_id = nic_id;
-                        mx_nic_id_to_board_number(nic_id, &peer->mxp_board);
-                }
-        }
-
-        peer->mxp_nic_id = nic_id; /* may be 0ULL if ip2nic_id() failed */
-
-        /* peer->mxp_reconnect_time = 0 */
-        /* peer->mxp_incompatible = 0 */
-
-        *peerp = peer;
-        return 0;
-}
-
-static inline kmx_peer_t *
-mxlnd_find_peer_by_nid_locked(lnet_nid_t nid)
-{
-        int             found   = 0;
-        int             hash    = 0;
-        kmx_peer_t      *peer   = NULL;
-
-        hash = mxlnd_nid_to_hash(nid);
-
-        cfs_list_for_each_entry(peer, &kmxlnd_data.kmx_peers[hash], mxp_list) {
-                if (peer->mxp_nid == nid) {
-                        found = 1;
-                        mxlnd_peer_addref(peer);
-                        break;
-                }
-        }
-        return (found ? peer : NULL);
-}
-
-static kmx_peer_t *
-mxlnd_find_peer_by_nid(lnet_nid_t nid, int create)
-{
-        int             ret     = 0;
-        int             hash    = 0;
-        kmx_peer_t      *peer   = NULL;
-        kmx_peer_t      *old    = NULL;
-       rwlock_t    *g_lock = &kmxlnd_data.kmx_global_lock;
-
-       read_lock(g_lock);
-        peer = mxlnd_find_peer_by_nid_locked(nid); /* adds peer ref */
-
-        if ((peer && peer->mxp_conn) || /* found peer with conn or */
-            (!peer && !create)) {       /* did not find peer and do not create one */
-               read_unlock(g_lock);
-                return peer;
-        }
-
-       read_unlock(g_lock);
-
-        /* if peer but _not_ conn */
-        if (peer && !peer->mxp_conn) {
-                if (create) {
-                       write_lock(g_lock);
-                        if (!peer->mxp_conn) { /* check again */
-                                /* create the conn */
-                                ret = mxlnd_conn_alloc_locked(&peer->mxp_conn, peer);
-                                if (ret != 0) {
-                                        /* we tried, return the peer only.
-                                         * the caller needs to see if the conn exists */
-                                        CNETERR("%s: %s could not alloc conn\n",
-                                        __func__, libcfs_nid2str(peer->mxp_nid));
-                                } else {
-                                        /* drop extra conn ref */
-                                        mxlnd_conn_decref(peer->mxp_conn);
-                                }
-                        }
-                       write_unlock(g_lock);
-                }
-                return peer;
-        }
-
-        /* peer not found and we need to create one */
-        hash = mxlnd_nid_to_hash(nid);
-
-        /* create peer (and conn) */
-        /* adds conn ref for peer and one for this function */
-        ret = mxlnd_peer_alloc(&peer, nid, *kmxlnd_tunables.kmx_board,
-                               *kmxlnd_tunables.kmx_ep_id, 0ULL);
-        if (ret != 0) /* no memory, peer is NULL */
-                return NULL;
-
-       write_lock(g_lock);
-
-        /* look again */
-        old = mxlnd_find_peer_by_nid_locked(nid);
-        if (old) {
-                /* someone already created one */
-                mxlnd_conn_decref(peer->mxp_conn); /* drop ref taken above.. */
-                mxlnd_conn_decref(peer->mxp_conn); /* drop peer's ref */
-                mxlnd_peer_decref(peer);
-                peer = old;
-        } else {
-               /* no other peer, use this one */
-               cfs_list_add_tail(&peer->mxp_list,
-                                 &kmxlnd_data.kmx_peers[hash]);
-               atomic_inc(&kmxlnd_data.kmx_npeers);
-               mxlnd_peer_addref(peer);
-               mxlnd_conn_decref(peer->mxp_conn); /* drop ref from peer_alloc */
-        }
-
-       write_unlock(g_lock);
-
-        return peer;
-}
-
-static inline int
-mxlnd_tx_requires_credit(kmx_ctx_t *tx)
-{
-        return (tx->mxc_msg_type == MXLND_MSG_EAGER ||
-                tx->mxc_msg_type == MXLND_MSG_GET_REQ ||
-                tx->mxc_msg_type == MXLND_MSG_PUT_REQ ||
-                tx->mxc_msg_type == MXLND_MSG_NOOP);
-}
-
-/**
- * mxlnd_init_msg - set type and number of bytes
- * @msg - msg pointer
- * @type - of message
- * @body_nob - bytes in msg body
- */
-static inline void
-mxlnd_init_msg(kmx_msg_t *msg, u8 type, int body_nob)
-{
-        msg->mxm_type = type;
-        msg->mxm_nob  = offsetof(kmx_msg_t, mxm_u) + body_nob;
-}
-
-static inline void
-mxlnd_init_tx_msg (kmx_ctx_t *tx, u8 type, int body_nob, lnet_nid_t nid)
-{
-        int             nob     = offsetof (kmx_msg_t, mxm_u) + body_nob;
-        kmx_msg_t       *msg    = NULL;
-
-        LASSERT (tx != NULL);
-        LASSERT (nob <= MXLND_MSG_SIZE);
-
-        tx->mxc_nid = nid;
-        /* tx->mxc_peer should have already been set if we know it */
-        tx->mxc_msg_type = type;
-        tx->mxc_nseg = 1;
-        /* tx->mxc_seg.segment_ptr is already pointing to mxc_page */
-        tx->mxc_seg.segment_length = nob;
-        tx->mxc_pin_type = MX_PIN_PHYSICAL;
-
-        msg = tx->mxc_msg;
-        msg->mxm_type = type;
-        msg->mxm_nob  = nob;
-
-        return;
-}
-
-static inline __u32
-mxlnd_cksum (void *ptr, int nob)
-{
-        char  *c  = ptr;
-        __u32  sum = 0;
-
-        while (nob-- > 0)
-                sum = ((sum << 1) | (sum >> 31)) + *c++;
-
-        /* ensure I don't return 0 (== no checksum) */
-        return (sum == 0) ? 1 : sum;
-}
-
-/**
- * mxlnd_pack_msg_locked - complete msg info
- * @tx - msg to send
- */
-static inline void
-mxlnd_pack_msg_locked(kmx_ctx_t *tx)
-{
-        kmx_msg_t       *msg    = tx->mxc_msg;
-
-        /* type and nob should already be set in init_msg() */
-        msg->mxm_magic    = MXLND_MSG_MAGIC;
-        msg->mxm_version  = MXLND_MSG_VERSION;
-        /*   mxm_type */
-        /* don't use mxlnd_tx_requires_credit() since we want PUT_ACK to
-         * return credits as well */
-        if (tx->mxc_msg_type != MXLND_MSG_CONN_REQ &&
-            tx->mxc_msg_type != MXLND_MSG_CONN_ACK) {
-                msg->mxm_credits  = tx->mxc_conn->mxk_outstanding;
-                tx->mxc_conn->mxk_outstanding = 0;
-        } else {
-                msg->mxm_credits  = 0;
-        }
-        /*   mxm_nob */
-        msg->mxm_cksum    = 0;
-        msg->mxm_srcnid   = kmxlnd_data.kmx_ni->ni_nid;
-        msg->mxm_srcstamp = kmxlnd_data.kmx_incarnation;
-        msg->mxm_dstnid   = tx->mxc_nid;
-        /* if it is a new peer, the dststamp will be 0 */
-        msg->mxm_dststamp = tx->mxc_conn->mxk_incarnation;
-
-        if (*kmxlnd_tunables.kmx_cksum) {
-                msg->mxm_cksum = mxlnd_cksum(msg, msg->mxm_nob);
-        }
-}
-
-int
-mxlnd_unpack_msg(kmx_msg_t *msg, int nob)
-{
-        const int hdr_size      = offsetof(kmx_msg_t, mxm_u);
-        __u32     msg_cksum     = 0;
-        int       flip          = 0;
-        int       msg_nob       = 0;
-
-        /* 6 bytes are enough to have received magic + version */
-        if (nob < 6) {
-                CNETERR("not enough bytes for magic + hdr: %d\n", nob);
-                return -EPROTO;
-        }
-
-        if (msg->mxm_magic == MXLND_MSG_MAGIC) {
-                flip = 0;
-        } else if (msg->mxm_magic == __swab32(MXLND_MSG_MAGIC)) {
-                flip = 1;
-        } else {
-                CNETERR("Bad magic: %08x\n", msg->mxm_magic);
-                return -EPROTO;
-        }
-
-        if (msg->mxm_version !=
-            (flip ? __swab16(MXLND_MSG_VERSION) : MXLND_MSG_VERSION)) {
-                CNETERR("Bad version: %d\n", msg->mxm_version);
-                return -EPROTO;
-        }
-
-        if (nob < hdr_size) {
-                CNETERR("not enough for a header: %d\n", nob);
-                return -EPROTO;
-        }
-
-        msg_nob = flip ? __swab32(msg->mxm_nob) : msg->mxm_nob;
-        if (msg_nob > nob) {
-                CNETERR("Short message: got %d, wanted %d\n", nob, msg_nob);
-                return -EPROTO;
-        }
-
-        /* checksum must be computed with mxm_cksum zero and BEFORE anything
-         * gets flipped */
-        msg_cksum = flip ? __swab32(msg->mxm_cksum) : msg->mxm_cksum;
-        msg->mxm_cksum = 0;
-        if (msg_cksum != 0 && msg_cksum != mxlnd_cksum(msg, msg_nob)) {
-                CNETERR("Bad checksum\n");
-                return -EPROTO;
-        }
-        msg->mxm_cksum = msg_cksum;
-
-        if (flip) {
-                /* leave magic unflipped as a clue to peer endianness */
-                __swab16s(&msg->mxm_version);
-                CLASSERT (sizeof(msg->mxm_type) == 1);
-                CLASSERT (sizeof(msg->mxm_credits) == 1);
-                msg->mxm_nob = msg_nob;
-                __swab64s(&msg->mxm_srcnid);
-                __swab64s(&msg->mxm_srcstamp);
-                __swab64s(&msg->mxm_dstnid);
-                __swab64s(&msg->mxm_dststamp);
-        }
-
-        if (msg->mxm_srcnid == LNET_NID_ANY) {
-                CNETERR("Bad src nid: %s\n", libcfs_nid2str(msg->mxm_srcnid));
-                return -EPROTO;
-        }
-
-        switch (msg->mxm_type) {
-        default:
-                CNETERR("Unknown message type %x\n", msg->mxm_type);
-                return -EPROTO;
-
-        case MXLND_MSG_NOOP:
-                break;
-
-        case MXLND_MSG_EAGER:
-                if (msg_nob < offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[0])) {
-                        CNETERR("Short EAGER: %d(%d)\n", msg_nob,
-                               (int)offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[0]));
-                        return -EPROTO;
-                }
-                break;
-
-        case MXLND_MSG_PUT_REQ:
-                if (msg_nob < hdr_size + sizeof(msg->mxm_u.put_req)) {
-                        CNETERR("Short PUT_REQ: %d(%d)\n", msg_nob,
-                               (int)(hdr_size + sizeof(msg->mxm_u.put_req)));
-                        return -EPROTO;
-                }
-                if (flip)
-                        __swab64s(&msg->mxm_u.put_req.mxprm_cookie);
-                break;
-
-        case MXLND_MSG_PUT_ACK:
-                if (msg_nob < hdr_size + sizeof(msg->mxm_u.put_ack)) {
-                        CNETERR("Short PUT_ACK: %d(%d)\n", msg_nob,
-                               (int)(hdr_size + sizeof(msg->mxm_u.put_ack)));
-                        return -EPROTO;
-                }
-                if (flip) {
-                        __swab64s(&msg->mxm_u.put_ack.mxpam_src_cookie);
-                        __swab64s(&msg->mxm_u.put_ack.mxpam_dst_cookie);
-                }
-                break;
-
-        case MXLND_MSG_GET_REQ:
-                if (msg_nob < hdr_size + sizeof(msg->mxm_u.get_req)) {
-                        CNETERR("Short GET_REQ: %d(%d)\n", msg_nob,
-                               (int)(hdr_size + sizeof(msg->mxm_u.get_req)));
-                        return -EPROTO;
-                }
-                if (flip) {
-                        __swab64s(&msg->mxm_u.get_req.mxgrm_cookie);
-                }
-                break;
-
-        case MXLND_MSG_CONN_REQ:
-        case MXLND_MSG_CONN_ACK:
-                if (msg_nob < hdr_size + sizeof(msg->mxm_u.conn_req)) {
-                        CNETERR("Short connreq/ack: %d(%d)\n", msg_nob,
-                               (int)(hdr_size + sizeof(msg->mxm_u.conn_req)));
-                        return -EPROTO;
-                }
-                if (flip) {
-                        __swab32s(&msg->mxm_u.conn_req.mxcrm_queue_depth);
-                        __swab32s(&msg->mxm_u.conn_req.mxcrm_eager_size);
-                }
-                break;
-        }
-        return 0;
-}
-
-
-/**
- * mxlnd_recv_msg
- * @lntmsg - the LNET msg that this is continuing. If EAGER, then NULL.
- * @rx
- * @msg_type
- * @cookie
- * @length - length of incoming message
- * @pending - add to kmx_pending (0 is NO and 1 is YES)
- *
- * The caller gets the rx and sets nid, peer and conn if known.
- *
- * Returns 0 on success and -1 on failure
- */
-int
-mxlnd_recv_msg(lnet_msg_t *lntmsg, kmx_ctx_t *rx, u8 msg_type, u64 cookie, u32 length)
-{
-        int             ret     = 0;
-        mx_return_t     mxret   = MX_SUCCESS;
-        uint64_t        mask    = ~(MXLND_ERROR_MASK);
-
-        rx->mxc_msg_type = msg_type;
-        rx->mxc_lntmsg[0] = lntmsg; /* may be NULL if EAGER */
-        rx->mxc_cookie = cookie;
-        /* rx->mxc_match may already be set */
-        /* rx->mxc_seg.segment_ptr is already set */
-        rx->mxc_seg.segment_length = length;
-        ret = mxlnd_q_pending_ctx(rx);
-        if (ret == -1) {
-                /* the caller is responsible for calling conn_decref() if needed */
-                return -1;
-        }
-        mxret = mx_kirecv(kmxlnd_data.kmx_endpt, &rx->mxc_seg, 1, MX_PIN_PHYSICAL,
-                          cookie, mask, (void *) rx, &rx->mxc_mxreq);
-        if (mxret != MX_SUCCESS) {
-                mxlnd_deq_pending_ctx(rx);
-                CNETERR("mx_kirecv() failed with %s (%d)\n",
-                        mx_strerror(mxret), (int) mxret);
-                return -1;
-        }
-        return 0;
-}
-
-
-/**
- * mxlnd_unexpected_recv - this is the callback function that will handle
- *                         unexpected receives
- * @context - NULL, ignore
- * @source - the peer's mx_endpoint_addr_t
- * @match_value - the msg's bits, should be MXLND_MSG_EAGER
- * @length - length of incoming message
- * @data_if_available - used for CONN_[REQ|ACK]
- *
- * If it is an eager-sized msg, we will call recv_msg() with the actual
- * length. If it is a large message, we will call recv_msg() with a
- * length of 0 bytes to drop it because we should never have a large,
- * unexpected message.
- *
- * NOTE - The MX library blocks until this function completes. Make it as fast as
- * possible. DO NOT allocate memory which can block!
- *
- * If we cannot get a rx or the conn is closed, drop the message on the floor
- * (i.e. recv 0 bytes and ignore).
- */
-mx_unexp_handler_action_t
-mxlnd_unexpected_recv(void *context, mx_endpoint_addr_t source,
-                 uint64_t match_value, uint32_t length, void *data_if_available)
-{
-        int             ret             = 0;
-        kmx_ctx_t       *rx             = NULL;
-        mx_ksegment_t   seg;
-        u8              msg_type        = 0;
-        u8              error           = 0;
-        u64             cookie          = 0ULL;
-        kmx_conn_t      *conn           = NULL;
-        kmx_peer_t      *peer           = NULL;
-        u64             nic_id          = 0ULL;
-        u32             ep_id           = 0;
-        u32             sid             = 0;
-
-        /* TODO this will change to the net struct */
-        if (context != NULL) {
-                CNETERR("non-NULL context\n");
-        }
-
-#if MXLND_DEBUG
-        CDEBUG(D_NET, "bits=0x%llx length=%d\n", match_value, length);
-#endif
-
-        mx_decompose_endpoint_addr2(source, &nic_id, &ep_id, &sid);
-        mxlnd_parse_match(match_value, &msg_type, &error, &cookie);
-       read_lock(&kmxlnd_data.kmx_global_lock);
-        mx_get_endpoint_addr_context(source, (void **) &conn);
-        if (conn) {
-                mxlnd_conn_addref(conn); /* add ref for this function */
-                peer = conn->mxk_peer;
-        }
-       read_unlock(&kmxlnd_data.kmx_global_lock);
-
-        if (msg_type == MXLND_MSG_BYE) {
-                if (conn) {
-                        CDEBUG(D_NET, "peer %s sent BYE msg\n",
-                                        libcfs_nid2str(peer->mxp_nid));
-                        mxlnd_conn_disconnect(conn, 1, 0);
-                        mxlnd_conn_decref(conn); /* drop ref taken above */
-                }
-                return MX_RECV_FINISHED;
-        }
-
-        if (msg_type == MXLND_MSG_CONN_REQ) {
-                kmx_connparams_t       *cp      = NULL;
-                const int       expected        = offsetof(kmx_msg_t, mxm_u) +
-                                                  sizeof(kmx_connreq_msg_t);
-
-                if (conn) mxlnd_conn_decref(conn); /* drop ref taken above */
-                if (unlikely(length != expected || !data_if_available)) {
-                        CNETERR("received invalid CONN_REQ from %llx "
-                                "length=%d (expected %d)\n", nic_id, length, expected);
-                        mxlnd_send_message(source, MXLND_MSG_CONN_ACK, EPROTO, 0);
-                        return MX_RECV_FINISHED;
-                }
-
-                ret = mxlnd_connparams_alloc(&cp, context, source, match_value, length,
-                                         conn, peer, data_if_available);
-                if (unlikely(ret != 0)) {
-                        CNETERR("unable to alloc CONN_REQ from %llx:%d\n",
-                                nic_id, ep_id);
-                        mxlnd_send_message(source, MXLND_MSG_CONN_ACK, ENOMEM, 0);
-                        return MX_RECV_FINISHED;
-                }
-               spin_lock(&kmxlnd_data.kmx_conn_lock);
-               cfs_list_add_tail(&cp->mxr_list, &kmxlnd_data.kmx_conn_reqs);
-               spin_unlock(&kmxlnd_data.kmx_conn_lock);
-               up(&kmxlnd_data.kmx_conn_sem);
-               return MX_RECV_FINISHED;
-       }
-        if (msg_type == MXLND_MSG_CONN_ACK) {
-                kmx_connparams_t  *cp           = NULL;
-                const int       expected        = offsetof(kmx_msg_t, mxm_u) +
-                                                  sizeof(kmx_connreq_msg_t);
-
-                LASSERT(conn);
-                if (unlikely(error != 0)) {
-                        CNETERR("received CONN_ACK from %s with error -%d\n",
-                               libcfs_nid2str(peer->mxp_nid), (int) error);
-                        mxlnd_conn_disconnect(conn, 1, 0);
-                } else if (unlikely(length != expected || !data_if_available)) {
-                        CNETERR("received %s CONN_ACK from %s "
-                               "length=%d (expected %d)\n",
-                               data_if_available ? "short" : "missing",
-                               libcfs_nid2str(peer->mxp_nid), length, expected);
-                        mxlnd_conn_disconnect(conn, 1, 1);
-                } else {
-                        /* peer is ready for messages */
-                        ret = mxlnd_connparams_alloc(&cp, context, source, match_value, length,
-                                         conn, peer, data_if_available);
-                        if (unlikely(ret != 0)) {
-                                CNETERR("unable to alloc kmx_connparams_t"
-                                               " from %llx:%d\n", nic_id, ep_id);
-                                mxlnd_conn_disconnect(conn, 1, 1);
-                       } else {
-                               spin_lock(&kmxlnd_data.kmx_conn_lock);
-                               cfs_list_add_tail(&cp->mxr_list,
-                                                 &kmxlnd_data.kmx_conn_reqs);
-                               spin_unlock(&kmxlnd_data.kmx_conn_lock);
-                               up(&kmxlnd_data.kmx_conn_sem);
-                       }
-                }
-                mxlnd_conn_decref(conn); /* drop ref taken above */
-
-                return MX_RECV_FINISHED;
-        }
-
-        /* Handle unexpected messages (PUT_REQ and GET_REQ) */
-
-        LASSERT(peer != NULL && conn != NULL);
-
-        rx = mxlnd_get_idle_rx(conn);
-        if (rx != NULL) {
-                if (length <= MXLND_MSG_SIZE) {
-                        ret = mxlnd_recv_msg(NULL, rx, msg_type, match_value, length);
-                } else {
-                        CNETERR("unexpected large receive with "
-                                "match_value=0x%llx length=%d\n",
-                                match_value, length);
-                        ret = mxlnd_recv_msg(NULL, rx, msg_type, match_value, 0);
-                }
-
-                if (ret == 0) {
-                        /* hold conn ref until rx completes */
-                        rx->mxc_conn = conn;
-                        rx->mxc_peer = peer;
-                        rx->mxc_nid = peer->mxp_nid;
-                } else {
-                        CNETERR("could not post receive\n");
-                        mxlnd_put_idle_rx(rx);
-                }
-        }
-
-        /* Encountered error, drop incoming message on the floor */
-        /* We could use MX_RECV_FINISHED but posting the receive of 0 bytes
-         * uses the standard code path and acks the sender normally */
-
-        if (rx == NULL || ret != 0) {
-                mxlnd_conn_decref(conn); /* drop ref taken above */
-                if (rx == NULL) {
-                        CNETERR("no idle rxs available - dropping rx"
-                                " 0x%llx from %s\n", match_value,
-                                libcfs_nid2str(peer->mxp_nid));
-                } else {
-                        /* ret != 0 */
-                        CNETERR("disconnected peer - dropping rx\n");
-                }
-                seg.segment_ptr = 0ULL;
-                seg.segment_length = 0;
-                mx_kirecv(kmxlnd_data.kmx_endpt, &seg, 1, MX_PIN_PHYSICAL,
-                          match_value, ~0ULL, NULL, NULL);
-        }
-
-        return MX_RECV_CONTINUE;
-}
-
-
-int
-mxlnd_get_peer_info(int index, lnet_nid_t *nidp, int *count)
-{
-        int              i      = 0;
-        int              ret    = -ENOENT;
-        kmx_peer_t      *peer   = NULL;
-
-       read_lock(&kmxlnd_data.kmx_global_lock);
-        for (i = 0; i < MXLND_HASH_SIZE; i++) {
-                cfs_list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i],
-                                        mxp_list) {
-                       if (index-- == 0) {
-                               *nidp = peer->mxp_nid;
-                               *count = atomic_read(&peer->mxp_refcount);
-                               ret = 0;
-                               break;
-                       }
-                }
-        }
-       read_unlock(&kmxlnd_data.kmx_global_lock);
-
-        return ret;
-}
-
-void
-mxlnd_del_peer_locked(kmx_peer_t *peer)
-{
-        if (peer->mxp_conn) {
-                mxlnd_conn_disconnect(peer->mxp_conn, 1, 1);
-        } else {
-                cfs_list_del_init(&peer->mxp_list); /* remove from the global list */
-                mxlnd_peer_decref(peer); /* drop global list ref */
-        }
-        return;
-}
-
-int
-mxlnd_del_peer(lnet_nid_t nid)
-{
-        int             i       = 0;
-        int             ret     = 0;
-        kmx_peer_t      *peer   = NULL;
-        kmx_peer_t      *next   = NULL;
-
-        if (nid != LNET_NID_ANY) {
-                peer = mxlnd_find_peer_by_nid(nid, 0); /* adds peer ref */
-        }
-       write_lock(&kmxlnd_data.kmx_global_lock);
-        if (nid != LNET_NID_ANY) {
-                if (peer == NULL) {
-                        ret = -ENOENT;
-                } else {
-                        mxlnd_peer_decref(peer); /* and drops it */
-                        mxlnd_del_peer_locked(peer);
-                }
-        } else { /* LNET_NID_ANY */
-                for (i = 0; i < MXLND_HASH_SIZE; i++) {
-                        cfs_list_for_each_entry_safe(peer, next,
-                                                     &kmxlnd_data.kmx_peers[i],
-                                                     mxp_list) {
-                                mxlnd_del_peer_locked(peer);
-                        }
-                }
-        }
-       write_unlock(&kmxlnd_data.kmx_global_lock);
-
-        return ret;
-}
-
-kmx_conn_t *
-mxlnd_get_conn_by_idx(int index)
-{
-        int              i      = 0;
-        kmx_peer_t      *peer   = NULL;
-        kmx_conn_t      *conn   = NULL;
-
-       read_lock(&kmxlnd_data.kmx_global_lock);
-        for (i = 0; i < MXLND_HASH_SIZE; i++) {
-                cfs_list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i],
-                                        mxp_list) {
-                        cfs_list_for_each_entry(conn, &peer->mxp_conns,
-                                                mxk_list) {
-                                if (index-- > 0) {
-                                        continue;
-                                }
-
-                                mxlnd_conn_addref(conn); /* add ref here, dec in ctl() */
-                               read_unlock(&kmxlnd_data.kmx_global_lock);
-                                return conn;
-                        }
-                }
-        }
-       read_unlock(&kmxlnd_data.kmx_global_lock);
-
-        return NULL;
-}
-
-void
-mxlnd_close_matching_conns_locked(kmx_peer_t *peer)
-{
-        kmx_conn_t      *conn   = NULL;
-        kmx_conn_t      *next   = NULL;
-
-        cfs_list_for_each_entry_safe(conn, next, &peer->mxp_conns, mxk_list)
-                mxlnd_conn_disconnect(conn, 0, 1);
-
-        return;
-}
-
-int
-mxlnd_close_matching_conns(lnet_nid_t nid)
-{
-        int             i       = 0;
-        int             ret     = 0;
-        kmx_peer_t      *peer   = NULL;
-
-       write_lock(&kmxlnd_data.kmx_global_lock);
-        if (nid != LNET_NID_ANY) {
-                peer = mxlnd_find_peer_by_nid_locked(nid); /* adds peer ref */
-                if (peer == NULL) {
-                        ret = -ENOENT;
-                } else {
-                        mxlnd_close_matching_conns_locked(peer);
-                        mxlnd_peer_decref(peer); /* and drops it here */
-                }
-        } else { /* LNET_NID_ANY */
-                for (i = 0; i < MXLND_HASH_SIZE; i++) {
-                        cfs_list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i],                                                mxp_list)
-                                mxlnd_close_matching_conns_locked(peer);
-                }
-        }
-       write_unlock(&kmxlnd_data.kmx_global_lock);
-
-        return ret;
-}
-
-/**
- * mxlnd_ctl - modify MXLND parameters
- * @ni - LNET interface handle
- * @cmd - command to change
- * @arg - the ioctl data
- */
-int
-mxlnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
-        struct libcfs_ioctl_data *data  = arg;
-        int                       ret   = -EINVAL;
-
-        LASSERT (ni == kmxlnd_data.kmx_ni);
-
-        switch (cmd) {
-        case IOC_LIBCFS_GET_PEER: {
-                lnet_nid_t      nid     = 0;
-                int             count   = 0;
-
-                ret = mxlnd_get_peer_info(data->ioc_count, &nid, &count);
-                data->ioc_nid    = nid;
-                data->ioc_count  = count;
-                break;
-        }
-        case IOC_LIBCFS_DEL_PEER: {
-                ret = mxlnd_del_peer(data->ioc_nid);
-                break;
-        }
-        case IOC_LIBCFS_GET_CONN: {
-                kmx_conn_t      *conn = NULL;
-
-                conn = mxlnd_get_conn_by_idx(data->ioc_count);
-                if (conn == NULL) {
-                        ret = -ENOENT;
-                } else {
-                        ret = 0;
-                        data->ioc_nid = conn->mxk_peer->mxp_nid;
-                        mxlnd_conn_decref(conn); /* dec ref taken in get_conn_by_idx() */
-                }
-                break;
-        }
-        case IOC_LIBCFS_CLOSE_CONNECTION: {
-                ret = mxlnd_close_matching_conns(data->ioc_nid);
-                break;
-        }
-        default:
-                CNETERR("unknown ctl(%d)\n", cmd);
-                break;
-        }
-
-        return ret;
-}
-
-/**
- * mxlnd_peer_queue_tx_locked - add the tx to the peer's tx queue
- * @tx
- *
- * Add the tx to the peer's msg or data queue. The caller has locked the peer.
- */
-void
-mxlnd_peer_queue_tx_locked(kmx_ctx_t *tx)
-{
-        u8              msg_type        = tx->mxc_msg_type;
-        kmx_conn_t      *conn           = tx->mxc_conn;
-
-        LASSERT (msg_type != 0);
-        LASSERT (tx->mxc_nid != 0);
-        LASSERT (tx->mxc_peer != NULL);
-        LASSERT (tx->mxc_conn != NULL);
-
-        tx->mxc_incarnation = conn->mxk_incarnation;
-
-        if (msg_type != MXLND_MSG_PUT_DATA &&
-            msg_type != MXLND_MSG_GET_DATA) {
-                /* msg style tx */
-                if (mxlnd_tx_requires_credit(tx)) {
-                        cfs_list_add_tail(&tx->mxc_list,
-                                          &conn->mxk_tx_credit_queue);
-                        conn->mxk_ntx_msgs++;
-                } else if (msg_type == MXLND_MSG_CONN_REQ ||
-                           msg_type == MXLND_MSG_CONN_ACK) {
-                        /* put conn msgs at the front of the queue */
-                        cfs_list_add(&tx->mxc_list, &conn->mxk_tx_free_queue);
-                } else {
-                        /* PUT_ACK, PUT_NAK */
-                        cfs_list_add_tail(&tx->mxc_list,
-                                          &conn->mxk_tx_free_queue);
-                        conn->mxk_ntx_msgs++;
-                }
-        } else {
-                /* data style tx */
-                cfs_list_add_tail(&tx->mxc_list, &conn->mxk_tx_free_queue);
-                conn->mxk_ntx_data++;
-        }
-
-        return;
-}
-
-/**
- * mxlnd_peer_queue_tx - add the tx to the global tx queue
- * @tx
- *
- * Add the tx to the peer's msg or data queue
- */
-static inline void
-mxlnd_peer_queue_tx(kmx_ctx_t *tx)
-{
-       LASSERT(tx->mxc_peer != NULL);
-       LASSERT(tx->mxc_conn != NULL);
-       spin_lock(&tx->mxc_conn->mxk_lock);
-       mxlnd_peer_queue_tx_locked(tx);
-       spin_unlock(&tx->mxc_conn->mxk_lock);
-
-       return;
-}
-
-/**
- * mxlnd_queue_tx - add the tx to the global tx queue
- * @tx
- *
- * Add the tx to the global queue and up the tx_queue_sem
- */
-void
-mxlnd_queue_tx(kmx_ctx_t *tx)
-{
-        kmx_peer_t *peer   = tx->mxc_peer;
-        LASSERT (tx->mxc_nid != 0);
-
-        if (peer != NULL) {
-                if (peer->mxp_incompatible &&
-                    tx->mxc_msg_type != MXLND_MSG_CONN_ACK) {
-                        /* let this fail now */
-                        tx->mxc_errno = -ECONNABORTED;
-                        mxlnd_conn_decref(peer->mxp_conn);
-                        mxlnd_put_idle_tx(tx);
-                        return;
-                }
-                if (tx->mxc_conn == NULL) {
-                        int             ret     = 0;
-                        kmx_conn_t      *conn   = NULL;
-
-                        ret = mxlnd_conn_alloc(&conn, peer); /* adds 2nd ref for tx... */
-                        if (ret != 0) {
-                                tx->mxc_errno = ret;
-                                mxlnd_put_idle_tx(tx);
-                                goto done;
-                        }
-                        tx->mxc_conn = conn;
-                        mxlnd_peer_decref(peer); /* and takes it from peer */
-                }
-                LASSERT(tx->mxc_conn != NULL);
-                mxlnd_peer_queue_tx(tx);
-                mxlnd_check_sends(peer);
-        } else {
-               spin_lock(&kmxlnd_data.kmx_tx_queue_lock);
-               cfs_list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_queue);
-               spin_unlock(&kmxlnd_data.kmx_tx_queue_lock);
-               up(&kmxlnd_data.kmx_tx_queue_sem);
-       }
-done:
-       return;
-}
-
-int
-mxlnd_setup_iov(kmx_ctx_t *ctx, u32 niov, struct iovec *iov, u32 offset, u32 nob)
-{
-        int             i                       = 0;
-        int             sum                     = 0;
-        int             old_sum                 = 0;
-        int             nseg                    = 0;
-        int             first_iov               = -1;
-        int             first_iov_offset        = 0;
-        int             first_found             = 0;
-        int             last_iov                = -1;
-        int             last_iov_length         = 0;
-        mx_ksegment_t  *seg                     = NULL;
-
-        if (niov == 0) return 0;
-        LASSERT(iov != NULL);
-
-        for (i = 0; i < niov; i++) {
-                sum = old_sum + (u32) iov[i].iov_len;
-                if (!first_found && (sum > offset)) {
-                        first_iov = i;
-                        first_iov_offset = offset - old_sum;
-                        first_found = 1;
-                        sum = (u32) iov[i].iov_len - first_iov_offset;
-                        old_sum = 0;
-                }
-                if (sum >= nob) {
-                        last_iov = i;
-                        last_iov_length = (u32) iov[i].iov_len - (sum - nob);
-                        if (first_iov == last_iov) last_iov_length -= first_iov_offset;
-                        break;
-                }
-                old_sum = sum;
-        }
-        LASSERT(first_iov >= 0 && last_iov >= first_iov);
-        nseg = last_iov - first_iov + 1;
-        LASSERT(nseg > 0);
-
-        MXLND_ALLOC(seg, nseg * sizeof(*seg));
-        if (seg == NULL) {
-                CNETERR("MXLND_ALLOC() failed\n");
-                return -1;
-        }
-        memset(seg, 0, nseg * sizeof(*seg));
-        ctx->mxc_nseg = nseg;
-        sum = 0;
-        for (i = 0; i < nseg; i++) {
-                seg[i].segment_ptr = MX_PA_TO_U64(virt_to_phys(iov[first_iov + i].iov_base));
-                seg[i].segment_length = (u32) iov[first_iov + i].iov_len;
-                if (i == 0) {
-                        seg[i].segment_ptr += (u64) first_iov_offset;
-                        seg[i].segment_length -= (u32) first_iov_offset;
-                }
-                if (i == (nseg - 1)) {
-                        seg[i].segment_length = (u32) last_iov_length;
-                }
-                sum += seg[i].segment_length;
-        }
-        ctx->mxc_seg_list = seg;
-        ctx->mxc_pin_type = MX_PIN_PHYSICAL;
-#ifdef MX_PIN_FULLPAGES
-        ctx->mxc_pin_type |= MX_PIN_FULLPAGES;
-#endif
-        LASSERT(nob == sum);
-        return 0;
-}
-
-int
-mxlnd_setup_kiov(kmx_ctx_t *ctx, u32 niov, lnet_kiov_t *kiov, u32 offset, u32 nob)
-{
-        int             i                       = 0;
-        int             sum                     = 0;
-        int             old_sum                 = 0;
-        int             nseg                    = 0;
-        int             first_kiov              = -1;
-        int             first_kiov_offset       = 0;
-        int             first_found             = 0;
-        int             last_kiov               = -1;
-        int             last_kiov_length        = 0;
-        mx_ksegment_t  *seg                     = NULL;
-
-        if (niov == 0) return 0;
-        LASSERT(kiov != NULL);
-
-        for (i = 0; i < niov; i++) {
-                sum = old_sum + kiov[i].kiov_len;
-                if (i == 0) sum -= kiov[i].kiov_offset;
-                if (!first_found && (sum > offset)) {
-                        first_kiov = i;
-                        first_kiov_offset = offset - old_sum;
-                        if (i == 0) first_kiov_offset = kiov[i].kiov_offset;
-                        first_found = 1;
-                        sum = kiov[i].kiov_len - first_kiov_offset;
-                        old_sum = 0;
-                }
-                if (sum >= nob) {
-                        last_kiov = i;
-                        last_kiov_length = kiov[i].kiov_len - (sum - nob);
-                        if (first_kiov == last_kiov) last_kiov_length -= first_kiov_offset;
-                        break;
-                }
-                old_sum = sum;
-        }
-        LASSERT(first_kiov >= 0 && last_kiov >= first_kiov);
-        nseg = last_kiov - first_kiov + 1;
-        LASSERT(nseg > 0);
-
-        MXLND_ALLOC(seg, nseg * sizeof(*seg));
-        if (seg == NULL) {
-                CNETERR("MXLND_ALLOC() failed\n");
-                return -1;
-        }
-        memset(seg, 0, niov * sizeof(*seg));
-        ctx->mxc_nseg = niov;
-        sum = 0;
-        for (i = 0; i < niov; i++) {
-               seg[i].segment_ptr =
-                       page_to_phys(kiov[first_kiov + i].kiov_page);
-                seg[i].segment_length = kiov[first_kiov + i].kiov_len;
-                if (i == 0) {
-                        seg[i].segment_ptr += (u64) first_kiov_offset;
-                        /* we have to add back the original kiov_offset */
-                        seg[i].segment_length -= first_kiov_offset +
-                                                 kiov[first_kiov].kiov_offset;
-                }
-                if (i == (nseg - 1)) {
-                        seg[i].segment_length = last_kiov_length;
-                }
-                sum += seg[i].segment_length;
-        }
-        ctx->mxc_seg_list = seg;
-        ctx->mxc_pin_type = MX_PIN_PHYSICAL;
-#ifdef MX_PIN_FULLPAGES
-        ctx->mxc_pin_type |= MX_PIN_FULLPAGES;
-#endif
-        LASSERT(nob == sum);
-        return 0;
-}
-
-void
-mxlnd_send_nak(kmx_ctx_t *tx, lnet_nid_t nid, int type, int status, __u64 cookie)
-{
-        LASSERT(type == MXLND_MSG_PUT_ACK);
-        mxlnd_init_tx_msg(tx, type, sizeof(kmx_putack_msg_t), tx->mxc_nid);
-        tx->mxc_cookie = cookie;
-        tx->mxc_msg->mxm_u.put_ack.mxpam_src_cookie = cookie;
-        tx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie = ((u64) status << MXLND_ERROR_OFFSET); /* error code */
-        tx->mxc_match = mxlnd_create_match(tx, status);
-
-        mxlnd_queue_tx(tx);
-}
-
-
-/**
- * mxlnd_send_data - get tx, map [k]iov, queue tx
- * @ni
- * @lntmsg
- * @peer
- * @msg_type
- * @cookie
- *
- * This setups the DATA send for PUT or GET.
- *
- * On success, it queues the tx, on failure it calls lnet_finalize()
- */
-void
-mxlnd_send_data(lnet_ni_t *ni, lnet_msg_t *lntmsg, kmx_peer_t *peer, u8 msg_type, u64 cookie)
-{
-        int                     ret     = 0;
-        lnet_process_id_t       target  = lntmsg->msg_target;
-        unsigned int            niov    = lntmsg->msg_niov;
-        struct iovec           *iov     = lntmsg->msg_iov;
-        lnet_kiov_t            *kiov    = lntmsg->msg_kiov;
-        unsigned int            offset  = lntmsg->msg_offset;
-        unsigned int            nob     = lntmsg->msg_len;
-        kmx_ctx_t              *tx      = NULL;
-
-        LASSERT(lntmsg != NULL);
-        LASSERT(peer != NULL);
-        LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA);
-        LASSERT((cookie>>MXLND_ERROR_OFFSET) == 0);
-
-        tx = mxlnd_get_idle_tx();
-        if (tx == NULL) {
-                CNETERR("Can't allocate %s tx for %s\n",
-                        msg_type == MXLND_MSG_PUT_DATA ? "PUT_DATA" : "GET_DATA",
-                        libcfs_nid2str(target.nid));
-                goto failed_0;
-        }
-        tx->mxc_nid = target.nid;
-        /* NOTE called when we have a ref on the conn, get one for this tx */
-        mxlnd_conn_addref(peer->mxp_conn);
-        tx->mxc_peer = peer;
-        tx->mxc_conn = peer->mxp_conn;
-        tx->mxc_msg_type = msg_type;
-        tx->mxc_lntmsg[0] = lntmsg;
-        tx->mxc_cookie = cookie;
-        tx->mxc_match = mxlnd_create_match(tx, 0);
-
-        /* This setups up the mx_ksegment_t to send the DATA payload  */
-        if (nob == 0) {
-                /* do not setup the segments */
-                CNETERR("nob = 0; why didn't we use an EAGER reply "
-                        "to %s?\n", libcfs_nid2str(target.nid));
-                ret = 0;
-        } else if (kiov == NULL) {
-                ret = mxlnd_setup_iov(tx, niov, iov, offset, nob);
-        } else {
-                ret = mxlnd_setup_kiov(tx, niov, kiov, offset, nob);
-        }
-        if (ret != 0) {
-                CNETERR("Can't setup send DATA for %s\n",
-                        libcfs_nid2str(target.nid));
-                tx->mxc_errno = -EIO;
-                goto failed_1;
-        }
-        mxlnd_queue_tx(tx);
-        return;
-
-failed_1:
-        mxlnd_conn_decref(peer->mxp_conn);
-        mxlnd_put_idle_tx(tx);
-        return;
-
-failed_0:
-        CNETERR("no tx avail\n");
-        lnet_finalize(ni, lntmsg, -EIO);
-        return;
-}
-
-/**
- * mxlnd_recv_data - map [k]iov, post rx
- * @ni
- * @lntmsg
- * @rx
- * @msg_type
- * @cookie
- *
- * This setups the DATA receive for PUT or GET.
- *
- * On success, it returns 0, on failure it returns -1
- */
-int
-mxlnd_recv_data(lnet_ni_t *ni, lnet_msg_t *lntmsg, kmx_ctx_t *rx, u8 msg_type, u64 cookie)
-{
-        int                     ret     = 0;
-        lnet_process_id_t       target  = lntmsg->msg_target;
-        unsigned int            niov    = lntmsg->msg_niov;
-        struct iovec           *iov     = lntmsg->msg_iov;
-        lnet_kiov_t            *kiov    = lntmsg->msg_kiov;
-        unsigned int            offset  = lntmsg->msg_offset;
-        unsigned int            nob     = lntmsg->msg_len;
-        mx_return_t             mxret   = MX_SUCCESS;
-        u64                     mask    = ~(MXLND_ERROR_MASK);
-
-        /* above assumes MXLND_MSG_PUT_DATA */
-        if (msg_type == MXLND_MSG_GET_DATA) {
-                niov = lntmsg->msg_md->md_niov;
-                iov = lntmsg->msg_md->md_iov.iov;
-                kiov = lntmsg->msg_md->md_iov.kiov;
-                offset = 0;
-                nob = lntmsg->msg_md->md_length;
-        }
-
-        LASSERT(lntmsg != NULL);
-        LASSERT(rx != NULL);
-        LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA);
-        LASSERT((cookie>>MXLND_ERROR_OFFSET) == 0); /* ensure top 12 bits are 0 */
-
-        rx->mxc_msg_type = msg_type;
-        rx->mxc_state = MXLND_CTX_PENDING;
-        rx->mxc_nid = target.nid;
-        /* if posting a GET_DATA, we may not yet know the peer */
-        if (rx->mxc_peer != NULL) {
-                rx->mxc_conn = rx->mxc_peer->mxp_conn;
-        }
-        rx->mxc_lntmsg[0] = lntmsg;
-        rx->mxc_cookie = cookie;
-        rx->mxc_match = mxlnd_create_match(rx, 0);
-        /* This setups up the mx_ksegment_t to receive the DATA payload  */
-        if (kiov == NULL) {
-                ret = mxlnd_setup_iov(rx, niov, iov, offset, nob);
-        } else {
-                ret = mxlnd_setup_kiov(rx, niov, kiov, offset, nob);
-        }
-        if (msg_type == MXLND_MSG_GET_DATA) {
-                rx->mxc_lntmsg[1] = lnet_create_reply_msg(kmxlnd_data.kmx_ni, lntmsg);
-                if (rx->mxc_lntmsg[1] == NULL) {
-                        CNETERR("Can't create reply for GET -> %s\n",
-                                libcfs_nid2str(target.nid));
-                        ret = -1;
-                }
-        }
-        if (ret != 0) {
-                CNETERR("Can't setup %s rx for %s\n",
-                       msg_type == MXLND_MSG_PUT_DATA ? "PUT_DATA" : "GET_DATA",
-                       libcfs_nid2str(target.nid));
-                return -1;
-        }
-        ret = mxlnd_q_pending_ctx(rx);
-        if (ret == -1) {
-                return -1;
-        }
-        CDEBUG(D_NET, "receiving %s 0x%llx\n", mxlnd_msgtype_to_str(msg_type), rx->mxc_cookie);
-        mxret = mx_kirecv(kmxlnd_data.kmx_endpt,
-                          rx->mxc_seg_list, rx->mxc_nseg,
-                          rx->mxc_pin_type, rx->mxc_match,
-                          mask, (void *) rx,
-                          &rx->mxc_mxreq);
-        if (mxret != MX_SUCCESS) {
-                if (rx->mxc_conn != NULL) {
-                        mxlnd_deq_pending_ctx(rx);
-                }
-                CNETERR("mx_kirecv() failed with %d for %s\n",
-                        (int) mxret, libcfs_nid2str(target.nid));
-                return -1;
-        }
-
-        return 0;
-}
-
-/**
- * mxlnd_send - the LND required send function
- * @ni
- * @private
- * @lntmsg
- *
- * This must not block. Since we may not have a peer struct for the receiver,
- * it will append send messages on a global tx list. We will then up the
- * tx_queued's semaphore to notify it of the new send.
- */
-int
-mxlnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
-        int                     ret             = 0;
-        int                     type            = lntmsg->msg_type;
-        lnet_hdr_t             *hdr             = &lntmsg->msg_hdr;
-        lnet_process_id_t       target          = lntmsg->msg_target;
-        lnet_nid_t              nid             = target.nid;
-        int                     target_is_router = lntmsg->msg_target_is_router;
-        int                     routing         = lntmsg->msg_routing;
-        unsigned int            payload_niov    = lntmsg->msg_niov;
-        struct iovec           *payload_iov     = lntmsg->msg_iov;
-        lnet_kiov_t            *payload_kiov    = lntmsg->msg_kiov;
-        unsigned int            payload_offset  = lntmsg->msg_offset;
-        unsigned int            payload_nob     = lntmsg->msg_len;
-        kmx_ctx_t              *tx              = NULL;
-        kmx_msg_t              *txmsg           = NULL;
-        kmx_ctx_t              *rx              = (kmx_ctx_t *) private; /* for REPLY */
-        kmx_ctx_t              *rx_data         = NULL;
-        kmx_conn_t             *conn            = NULL;
-        int                     nob             = 0;
-        uint32_t                length          = 0;
-        kmx_peer_t             *peer            = NULL;
-       rwlock_t                *g_lock         =&kmxlnd_data.kmx_global_lock;
-
-        CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
-                       payload_nob, payload_niov, libcfs_id2str(target));
-
-        LASSERT (payload_nob == 0 || payload_niov > 0);
-        LASSERT (payload_niov <= LNET_MAX_IOV);
-        /* payload is either all vaddrs or all pages */
-        LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
-        /* private is used on LNET_GET_REPLY only, NULL for all other cases */
-
-        /* NOTE we may not know the peer if it is the very first PUT_REQ or GET_REQ
-         * to a new peer, so create one if not found */
-        peer = mxlnd_find_peer_by_nid(nid, 1); /* adds peer ref */
-        if (peer == NULL || peer->mxp_conn == NULL) {
-                /* we could not find it nor could we create one or
-                 * one exists but we cannot create a conn,
-                 * fail this message */
-                if (peer) {
-                        /* found peer without conn, drop ref taken above */
-                        LASSERT(peer->mxp_conn == NULL);
-                        mxlnd_peer_decref(peer);
-                }
-                return -ENOMEM;
-        }
-
-        /* we have a peer with a conn */
-
-        if (unlikely(peer->mxp_incompatible)) {
-                mxlnd_peer_decref(peer); /* drop ref taken above */
-        } else {
-               read_lock(g_lock);
-               conn = peer->mxp_conn;
-               if (conn && conn->mxk_status != MXLND_CONN_DISCONNECT)
-                       mxlnd_conn_addref(conn);
-               else
-                       conn = NULL;
-               read_unlock(g_lock);
-                mxlnd_peer_decref(peer); /* drop peer ref taken above */
-                if (!conn)
-                        return -ENOTCONN;
-        }
-
-        LASSERT(peer && conn);
-
-        CDEBUG(D_NET, "%s: peer 0x%llx is 0x%p\n", __func__, nid, peer);
-
-        switch (type) {
-        case LNET_MSG_ACK:
-                LASSERT (payload_nob == 0);
-                break;
-
-        case LNET_MSG_REPLY:
-        case LNET_MSG_PUT:
-                /* Is the payload small enough not to need DATA? */
-                nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[payload_nob]);
-                if (nob <= MXLND_MSG_SIZE)
-                        break;                  /* send EAGER */
-
-                tx = mxlnd_get_idle_tx();
-                if (unlikely(tx == NULL)) {
-                        CNETERR("Can't allocate %s tx for %s\n",
-                                type == LNET_MSG_PUT ? "PUT" : "REPLY",
-                                libcfs_nid2str(nid));
-                        if (conn) mxlnd_conn_decref(conn);
-                        return -ENOMEM;
-                }
-
-                tx->mxc_peer = peer;
-                tx->mxc_conn = conn;
-                /* we added a conn ref above */
-                mxlnd_init_tx_msg (tx, MXLND_MSG_PUT_REQ, sizeof(kmx_putreq_msg_t), nid);
-                txmsg = tx->mxc_msg;
-                txmsg->mxm_u.put_req.mxprm_hdr = *hdr;
-                txmsg->mxm_u.put_req.mxprm_cookie = tx->mxc_cookie;
-                tx->mxc_match = mxlnd_create_match(tx, 0);
-
-                /* we must post a receive _before_ sending the request.
-                 * we need to determine how much to receive, it will be either
-                 * a put_ack or a put_nak. The put_ack is larger, so use it. */
-
-                rx = mxlnd_get_idle_rx(conn);
-                if (unlikely(rx == NULL)) {
-                        CNETERR("Can't allocate rx for PUT_ACK for %s\n",
-                                libcfs_nid2str(nid));
-                        mxlnd_put_idle_tx(tx);
-                        if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */
-                        return -ENOMEM;
-                }
-                rx->mxc_nid = nid;
-                rx->mxc_peer = peer;
-                mxlnd_conn_addref(conn); /* for this rx */
-                rx->mxc_conn = conn;
-                rx->mxc_msg_type = MXLND_MSG_PUT_ACK;
-                rx->mxc_cookie = tx->mxc_cookie;
-                rx->mxc_match = mxlnd_create_match(rx, 0);
-
-                length = offsetof(kmx_msg_t, mxm_u) + sizeof(kmx_putack_msg_t);
-                ret = mxlnd_recv_msg(lntmsg, rx, MXLND_MSG_PUT_ACK, rx->mxc_match, length);
-                if (unlikely(ret != 0)) {
-                        CNETERR("recv_msg() failed for PUT_ACK for %s\n",
-                                           libcfs_nid2str(nid));
-                        rx->mxc_lntmsg[0] = NULL;
-                        mxlnd_put_idle_rx(rx);
-                        mxlnd_put_idle_tx(tx);
-                        mxlnd_conn_decref(conn); /* for the rx... */
-                        mxlnd_conn_decref(conn); /* and for the tx */
-                        return -EHOSTUNREACH;
-                }
-
-                mxlnd_queue_tx(tx);
-                return 0;
-
-        case LNET_MSG_GET:
-                if (routing || target_is_router)
-                        break;                  /* send EAGER */
-
-                /* is the REPLY message too small for DATA? */
-                nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[lntmsg->msg_md->md_length]);
-                if (nob <= MXLND_MSG_SIZE)
-                        break;                  /* send EAGER */
-
-                /* get tx (we need the cookie) , post rx for incoming DATA,
-                 * then post GET_REQ tx */
-                tx = mxlnd_get_idle_tx();
-                if (unlikely(tx == NULL)) {
-                        CNETERR("Can't allocate GET tx for %s\n",
-                                libcfs_nid2str(nid));
-                        mxlnd_conn_decref(conn); /* for the ref taken above */
-                        return -ENOMEM;
-                }
-                rx_data = mxlnd_get_idle_rx(conn);
-                if (unlikely(rx_data == NULL)) {
-                        CNETERR("Can't allocate DATA rx for %s\n",
-                                libcfs_nid2str(nid));
-                        mxlnd_put_idle_tx(tx);
-                        mxlnd_conn_decref(conn); /* for the ref taken above */
-                        return -ENOMEM;
-                }
-                rx_data->mxc_peer = peer;
-                /* NOTE no need to lock peer before adding conn ref since we took
-                 * a conn ref for the tx (it cannot be freed between there and here ) */
-                mxlnd_conn_addref(conn); /* for the rx_data */
-                rx_data->mxc_conn = conn;
-
-                ret = mxlnd_recv_data(ni, lntmsg, rx_data, MXLND_MSG_GET_DATA, tx->mxc_cookie);
-                if (unlikely(ret != 0)) {
-                        CNETERR("Can't setup GET sink for %s\n",
-                                libcfs_nid2str(nid));
-                        mxlnd_put_idle_rx(rx_data);
-                        mxlnd_put_idle_tx(tx);
-                        mxlnd_conn_decref(conn); /* for the rx_data... */
-                        mxlnd_conn_decref(conn); /* and for the tx */
-                        return -EIO;
-                }
-
-                tx->mxc_peer = peer;
-                tx->mxc_conn = conn;
-                /* conn ref taken above */
-                mxlnd_init_tx_msg(tx, MXLND_MSG_GET_REQ, sizeof(kmx_getreq_msg_t), nid);
-                txmsg = tx->mxc_msg;
-                txmsg->mxm_u.get_req.mxgrm_hdr = *hdr;
-                txmsg->mxm_u.get_req.mxgrm_cookie = tx->mxc_cookie;
-                tx->mxc_match = mxlnd_create_match(tx, 0);
-
-                mxlnd_queue_tx(tx);
-                return 0;
-
-        default:
-                LBUG();
-                mxlnd_conn_decref(conn); /* drop ref taken above */
-                return -EIO;
-        }
-
-        /* send EAGER */
-
-        LASSERT (offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[payload_nob])
-                <= MXLND_MSG_SIZE);
-
-        tx = mxlnd_get_idle_tx();
-        if (unlikely(tx == NULL)) {
-                CNETERR("Can't send %s to %s: tx descs exhausted\n",
-                        mxlnd_lnetmsg_to_str(type), libcfs_nid2str(nid));
-                mxlnd_conn_decref(conn); /* drop ref taken above */
-                return -ENOMEM;
-        }
-
-        tx->mxc_peer = peer;
-        tx->mxc_conn = conn;
-        /* conn ref taken above */
-        nob = offsetof(kmx_eager_msg_t, mxem_payload[payload_nob]);
-        mxlnd_init_tx_msg (tx, MXLND_MSG_EAGER, nob, nid);
-        tx->mxc_match = mxlnd_create_match(tx, 0);
-
-        txmsg = tx->mxc_msg;
-        txmsg->mxm_u.eager.mxem_hdr = *hdr;
-
-        if (payload_kiov != NULL)
-                lnet_copy_kiov2flat(MXLND_MSG_SIZE, txmsg,
-                            offsetof(kmx_msg_t, mxm_u.eager.mxem_payload),
-                            payload_niov, payload_kiov, payload_offset, payload_nob);
-        else
-                lnet_copy_iov2flat(MXLND_MSG_SIZE, txmsg,
-                            offsetof(kmx_msg_t, mxm_u.eager.mxem_payload),
-                            payload_niov, payload_iov, payload_offset, payload_nob);
-
-        tx->mxc_lntmsg[0] = lntmsg;              /* finalise lntmsg on completion */
-        mxlnd_queue_tx(tx);
-        return 0;
-}
-
-/**
- * mxlnd_recv - the LND required recv function
- * @ni
- * @private
- * @lntmsg
- * @delayed
- * @niov
- * @kiov
- * @offset
- * @mlen
- * @rlen
- *
- * This must not block.
- */
-int
-mxlnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
-             unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
-             unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
-        int             ret             = 0;
-        int             nob             = 0;
-        int             len             = 0;
-        kmx_ctx_t       *rx             = private;
-        kmx_msg_t       *rxmsg          = rx->mxc_msg;
-        lnet_nid_t       nid            = rx->mxc_nid;
-        kmx_ctx_t       *tx             = NULL;
-        kmx_msg_t       *txmsg          = NULL;
-        kmx_peer_t      *peer           = rx->mxc_peer;
-        kmx_conn_t      *conn           = peer->mxp_conn;
-        u64              cookie         = 0ULL;
-        int              msg_type       = rxmsg->mxm_type;
-        int              repost         = 1;
-        int              credit         = 0;
-        int              finalize       = 0;
-
-        LASSERT (mlen <= rlen);
-        /* Either all pages or all vaddrs */
-        LASSERT (!(kiov != NULL && iov != NULL));
-        LASSERT (peer && conn);
-
-        /* conn_addref(conn) already taken for the primary rx */
-
-        switch (msg_type) {
-        case MXLND_MSG_EAGER:
-                nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[rlen]);
-                len = rx->mxc_status.xfer_length;
-                if (unlikely(nob > len)) {
-                        CNETERR("Eager message from %s too big: %d(%d)\n",
-                                libcfs_nid2str(nid), nob, len);
-                        ret = -EPROTO;
-                        break;
-                }
-
-                if (kiov != NULL)
-                        lnet_copy_flat2kiov(niov, kiov, offset,
-                                MXLND_MSG_SIZE, rxmsg,
-                                offsetof(kmx_msg_t, mxm_u.eager.mxem_payload),
-                                mlen);
-                else
-                        lnet_copy_flat2iov(niov, iov, offset,
-                                MXLND_MSG_SIZE, rxmsg,
-                                offsetof(kmx_msg_t, mxm_u.eager.mxem_payload),
-                                mlen);
-                finalize = 1;
-                credit = 1;
-                break;
-
-        case MXLND_MSG_PUT_REQ:
-                /* we are going to reuse the rx, store the needed info */
-                cookie = rxmsg->mxm_u.put_req.mxprm_cookie;
-
-                /* get tx, post rx, send PUT_ACK */
-
-                tx = mxlnd_get_idle_tx();
-                if (unlikely(tx == NULL)) {
-                        CNETERR("Can't allocate tx for %s\n", libcfs_nid2str(nid));
-                        /* Not replying will break the connection */
-                        ret = -ENOMEM;
-                        break;
-                }
-                if (unlikely(mlen == 0)) {
-                        finalize = 1;
-                        tx->mxc_peer = peer;
-                        tx->mxc_conn = conn;
-                        mxlnd_send_nak(tx, nid, MXLND_MSG_PUT_ACK, 0, cookie);
-                        /* repost = 1 */
-                        break;
-                }
-
-                mxlnd_init_tx_msg(tx, MXLND_MSG_PUT_ACK, sizeof(kmx_putack_msg_t), nid);
-                tx->mxc_peer = peer;
-                tx->mxc_conn = conn;
-                /* no need to lock peer first since we already have a ref */
-                mxlnd_conn_addref(conn); /* for the tx */
-                txmsg = tx->mxc_msg;
-                txmsg->mxm_u.put_ack.mxpam_src_cookie = cookie;
-                txmsg->mxm_u.put_ack.mxpam_dst_cookie = tx->mxc_cookie;
-                tx->mxc_cookie = cookie;
-                tx->mxc_match = mxlnd_create_match(tx, 0);
-
-                /* we must post a receive _before_ sending the PUT_ACK */
-                mxlnd_ctx_init(rx);
-                rx->mxc_state = MXLND_CTX_PREP;
-                rx->mxc_peer = peer;
-                rx->mxc_conn = conn;
-                /* do not take another ref for this rx, it is already taken */
-                rx->mxc_nid = peer->mxp_nid;
-                ret = mxlnd_recv_data(ni, lntmsg, rx, MXLND_MSG_PUT_DATA,
-                                      txmsg->mxm_u.put_ack.mxpam_dst_cookie);
-
-                if (unlikely(ret != 0)) {
-                        /* Notify peer that it's over */
-                        CNETERR("Can't setup PUT_DATA rx for %s: %d\n",
-                                libcfs_nid2str(nid), ret);
-                        mxlnd_ctx_init(tx);
-                        tx->mxc_state = MXLND_CTX_PREP;
-                        tx->mxc_peer = peer;
-                        tx->mxc_conn = conn;
-                        /* finalize = 0, let the PUT_ACK tx finalize this */
-                        tx->mxc_lntmsg[0] = rx->mxc_lntmsg[0];
-                        tx->mxc_lntmsg[1] = rx->mxc_lntmsg[1];
-                        /* conn ref already taken above */
-                        mxlnd_send_nak(tx, nid, MXLND_MSG_PUT_ACK, ret, cookie);
-                        /* repost = 1 */
-                        break;
-                }
-
-                mxlnd_queue_tx(tx);
-                /* do not return a credit until after PUT_DATA returns */
-                repost = 0;
-                break;
-
-        case MXLND_MSG_GET_REQ:
-                cookie = rxmsg->mxm_u.get_req.mxgrm_cookie;
-
-                if (likely(lntmsg != NULL)) {
-                        mxlnd_send_data(ni, lntmsg, rx->mxc_peer, MXLND_MSG_GET_DATA,
-                                        cookie);
-                } else {
-                        /* GET didn't match anything */
-                        /* The initiator has a rx mapped to [k]iov. We cannot send a nak.
-                         * We have to embed the error code in the match bits.
-                         * Send the error in bits 52-59 and the cookie in bits 0-51 */
-                        tx = mxlnd_get_idle_tx();
-                        if (unlikely(tx == NULL)) {
-                                CNETERR("Can't get tx for GET NAK for %s\n",
-                                        libcfs_nid2str(nid));
-                                /* we can't get a tx, notify the peer that the GET failed */
-                                mxlnd_send_message(conn->mxk_epa, MXLND_MSG_GET_DATA,
-                                                   ENODATA, cookie);
-                                ret = -ENOMEM;
-                                break;
-                        }
-                        tx->mxc_msg_type = MXLND_MSG_GET_DATA;
-                        tx->mxc_state = MXLND_CTX_PENDING;
-                        tx->mxc_nid = nid;
-                        tx->mxc_peer = peer;
-                        tx->mxc_conn = conn;
-                        /* no need to lock peer first since we already have a ref */
-                        mxlnd_conn_addref(conn); /* for this tx */
-                        tx->mxc_cookie = cookie;
-                        tx->mxc_match = mxlnd_create_match(tx, ENODATA);
-                        tx->mxc_pin_type = MX_PIN_PHYSICAL;
-                        mxlnd_queue_tx(tx);
-                }
-                /* finalize lntmsg after tx completes */
-                break;
-
-        default:
-                LBUG();
-        }
-
-        if (repost) {
-                /* we received a message, increment peer's outstanding credits */
-               if (credit == 1) {
-                       spin_lock(&conn->mxk_lock);
-                       conn->mxk_outstanding++;
-                       spin_unlock(&conn->mxk_lock);
-               }
-                /* we are done with the rx */
-                mxlnd_put_idle_rx(rx);
-                mxlnd_conn_decref(conn);
-        }
-
-        if (finalize == 1) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg, 0);
-
-        /* we received a credit, see if we can use it to send a msg */
-        if (credit) mxlnd_check_sends(peer);
-
-        return ret;
-}
-
-void
-mxlnd_sleep(unsigned long timeout)
-{
-       set_current_state(TASK_INTERRUPTIBLE);
-       schedule_timeout(timeout);
-       return;
-}
-
-/**
- * mxlnd_tx_queued - the generic send queue thread
- * @arg - thread id (as a void *)
- *
- * This thread moves send messages from the global tx_queue to the owning
- * peer's tx_[msg|data]_queue. If the peer does not exist, it creates one and adds
- * it to the global peer list.
- */
-int
-mxlnd_tx_queued(void *arg)
-{
-        long                    id      = (long) arg;
-        int                     ret     = 0;
-        int                     found   = 0;
-        kmx_ctx_t              *tx      = NULL;
-        kmx_peer_t             *peer    = NULL;
-        cfs_list_t             *queue   = &kmxlnd_data.kmx_tx_queue;
-       spinlock_t              *tx_q_lock = &kmxlnd_data.kmx_tx_queue_lock;
-       rwlock_t                *g_lock  = &kmxlnd_data.kmx_global_lock;
-
-       while (!(atomic_read(&kmxlnd_data.kmx_shutdown))) {
-               ret = down_interruptible(&kmxlnd_data.kmx_tx_queue_sem);
-               if (atomic_read(&kmxlnd_data.kmx_shutdown))
-                       break;
-               if (ret != 0) /* Should we check for -EINTR? */
-                       continue;
-               spin_lock(tx_q_lock);
-               if (cfs_list_empty(&kmxlnd_data.kmx_tx_queue)) {
-                       spin_unlock(tx_q_lock);
-                       continue;
-               }
-               tx = cfs_list_entry(queue->next, kmx_ctx_t, mxc_list);
-               cfs_list_del_init(&tx->mxc_list);
-               spin_unlock(tx_q_lock);
-
-               found = 0;
-               peer = mxlnd_find_peer_by_nid(tx->mxc_nid, 0); /* adds ref*/
-               if (peer != NULL) {
-                       tx->mxc_peer = peer;
-                       write_lock(g_lock);
-                       if (peer->mxp_conn == NULL) {
-                               ret = mxlnd_conn_alloc_locked(&peer->mxp_conn,
-                                                             peer);
-                               if (ret != 0) {
-                                       /* out of memory: give up, fail tx */
-                                       tx->mxc_errno = -ENOMEM;
-                                       mxlnd_peer_decref(peer);
-                                       write_unlock(g_lock);
-                                       mxlnd_put_idle_tx(tx);
-                                       continue;
-                               }
-                       }
-                       tx->mxc_conn = peer->mxp_conn;
-                       mxlnd_conn_addref(tx->mxc_conn); /* for this tx */
-                       mxlnd_peer_decref(peer); /* drop peer ref taken above */
-                       write_unlock(g_lock);
-                        mxlnd_queue_tx(tx);
-                        found = 1;
-                }
-                if (found == 0) {
-                        int             hash    = 0;
-                        kmx_peer_t     *peer    = NULL;
-                        kmx_peer_t     *old     = NULL;
-
-                        hash = mxlnd_nid_to_hash(tx->mxc_nid);
-
-                        LASSERT(tx->mxc_msg_type != MXLND_MSG_PUT_DATA &&
-                                tx->mxc_msg_type != MXLND_MSG_GET_DATA);
-                        /* create peer */
-                        /* adds conn ref for this function */
-                        ret = mxlnd_peer_alloc(&peer, tx->mxc_nid,
-                                        *kmxlnd_tunables.kmx_board,
-                                        *kmxlnd_tunables.kmx_ep_id, 0ULL);
-                        if (ret != 0) {
-                                /* finalize message */
-                                tx->mxc_errno = ret;
-                                mxlnd_put_idle_tx(tx);
-                                continue;
-                        }
-                        tx->mxc_peer = peer;
-                        tx->mxc_conn = peer->mxp_conn;
-                        /* this tx will keep the conn ref taken in peer_alloc() */
-
-                        /* add peer to global peer list, but look to see
-                         * if someone already created it after we released
-                         * the read lock */
-                       write_lock(g_lock);
-                        old = mxlnd_find_peer_by_nid_locked(peer->mxp_nid);
-                        if (old) {
-                                /* we have a peer ref on old */
-                                if (old->mxp_conn) {
-                                        found = 1;
-                                } else {
-                                        /* no conn */
-                                        /* drop our ref taken above... */
-                                        mxlnd_peer_decref(old);
-                                        /* and delete it */
-                                        mxlnd_del_peer_locked(old);
-                                }
-                        }
-
-                       if (found == 0) {
-                               cfs_list_add_tail(&peer->mxp_list,
-                                                 &kmxlnd_data.kmx_peers[hash]);
-                               atomic_inc(&kmxlnd_data.kmx_npeers);
-                       } else {
-                                tx->mxc_peer = old;
-                                tx->mxc_conn = old->mxp_conn;
-                                LASSERT(old->mxp_conn != NULL);
-                                mxlnd_conn_addref(old->mxp_conn);
-                                mxlnd_conn_decref(peer->mxp_conn); /* drop ref taken above.. */
-                                mxlnd_conn_decref(peer->mxp_conn); /* drop peer's ref */
-                                mxlnd_peer_decref(peer);
-                        }
-                       write_unlock(g_lock);
-
-                        mxlnd_queue_tx(tx);
-                }
-        }
-        mxlnd_thread_stop(id);
-        return 0;
-}
-
-/* When calling this, we must not have the peer lock. */
-void
-mxlnd_iconnect(kmx_peer_t *peer, u8 msg_type)
-{
-        mx_return_t     mxret           = MX_SUCCESS;
-        mx_request_t    request;
-        kmx_conn_t      *conn           = peer->mxp_conn;
-        u64             match           = ((u64) msg_type) << MXLND_MSG_OFFSET;
-
-        /* NOTE we are holding a conn ref every time we call this function,
-         * we do not need to lock the peer before taking another ref */
-        mxlnd_conn_addref(conn); /* hold until CONN_REQ or CONN_ACK completes */
-
-        LASSERT(msg_type == MXLND_MSG_ICON_REQ || msg_type == MXLND_MSG_ICON_ACK);
-
-        if (peer->mxp_reconnect_time == 0) {
-                peer->mxp_reconnect_time = jiffies;
-        }
-
-        if (peer->mxp_nic_id == 0ULL) {
-                int     ret     = 0;
-
-                ret = mxlnd_ip2nic_id(LNET_NIDADDR(peer->mxp_nid),
-                                      &peer->mxp_nic_id, MXLND_LOOKUP_COUNT);
-                if (ret == 0) {
-                        mx_nic_id_to_board_number(peer->mxp_nic_id, &peer->mxp_board);
-                }
-                if (peer->mxp_nic_id == 0ULL && conn->mxk_status == MXLND_CONN_WAIT) {
-                       /* not mapped yet, return */
-                       spin_lock(&conn->mxk_lock);
-                       mxlnd_set_conn_status(conn, MXLND_CONN_INIT);
-                       spin_unlock(&conn->mxk_lock);
-                }
-        }
-
-        if (cfs_time_after(jiffies,
-                           peer->mxp_reconnect_time + MXLND_CONNECT_TIMEOUT) &&
-            conn->mxk_status != MXLND_CONN_DISCONNECT) {
-                /* give up and notify LNET */
-                CDEBUG(D_NET, "timeout trying to connect to %s\n",
-                       libcfs_nid2str(peer->mxp_nid));
-                mxlnd_conn_disconnect(conn, 0, 0);
-                mxlnd_conn_decref(conn);
-                return;
-        }
-
-        mxret = mx_iconnect(kmxlnd_data.kmx_endpt, peer->mxp_nic_id,
-                            peer->mxp_ep_id, MXLND_MSG_MAGIC, match,
-                            (void *) peer, &request);
-        if (unlikely(mxret != MX_SUCCESS)) {
-               spin_lock(&conn->mxk_lock);
-               mxlnd_set_conn_status(conn, MXLND_CONN_FAIL);
-               spin_unlock(&conn->mxk_lock);
-                CNETERR("mx_iconnect() failed with %s (%d) to %s\n",
-                       mx_strerror(mxret), mxret, libcfs_nid2str(peer->mxp_nid));
-                mxlnd_conn_decref(conn);
-        }
-       mx_set_request_timeout(kmxlnd_data.kmx_endpt, request,
-                              jiffies_to_msecs(MXLND_CONNECT_TIMEOUT));
-       return;
-}
-
-#define MXLND_STATS 0
-
-int
-mxlnd_check_sends(kmx_peer_t *peer)
-{
-        int             ret             = 0;
-        int             found           = 0;
-        mx_return_t     mxret           = MX_SUCCESS;
-        kmx_ctx_t       *tx             = NULL;
-        kmx_conn_t      *conn           = NULL;
-        u8              msg_type        = 0;
-        int             credit          = 0;
-        int             status          = 0;
-        int             ntx_posted      = 0;
-        int             credits         = 0;
-#if MXLND_STATS
-        static unsigned long last       = 0;
-#endif
-
-        if (unlikely(peer == NULL)) {
-                LASSERT(peer != NULL);
-                return -1;
-        }
-       write_lock(&kmxlnd_data.kmx_global_lock);
-       conn = peer->mxp_conn;
-       /* NOTE take a ref for the duration of this function since it is
-        * called when there might not be any queued txs for this peer */
-       if (conn) {
-               if (conn->mxk_status == MXLND_CONN_DISCONNECT) {
-                       write_unlock(&kmxlnd_data.kmx_global_lock);
-                       return -1;
-               }
-               mxlnd_conn_addref(conn); /* for duration of this function */
-       }
-       write_unlock(&kmxlnd_data.kmx_global_lock);
-
-        /* do not add another ref for this tx */
-
-        if (conn == NULL) {
-                /* we do not have any conns */
-                CNETERR("peer %s has no conn\n", libcfs_nid2str(peer->mxp_nid));
-                return -1;
-        }
-
-#if MXLND_STATS
-       if (cfs_time_after(jiffies, last)) {
-               last = jiffies + msecs_to_jiffies(MSEC_PER_SEC);
-               CDEBUG(D_NET, "status= %s credits= %d outstanding= %d ntx_msgs= %d "
-                             "ntx_posted= %d ntx_data= %d data_posted= %d\n",
-                             mxlnd_connstatus_to_str(conn->mxk_status), conn->mxk_credits,
-                             conn->mxk_outstanding, conn->mxk_ntx_msgs, conn->mxk_ntx_posted,
-                             conn->mxk_ntx_data, conn->mxk_data_posted);
-       }
-#endif
-
-       spin_lock(&conn->mxk_lock);
-        ntx_posted = conn->mxk_ntx_posted;
-        credits = conn->mxk_credits;
-
-        LASSERT(ntx_posted <= *kmxlnd_tunables.kmx_peercredits);
-        LASSERT(ntx_posted >= 0);
-
-        LASSERT(credits <= *kmxlnd_tunables.kmx_peercredits);
-        LASSERT(credits >= 0);
-
-        /* check number of queued msgs, ignore data */
-        if (conn->mxk_outstanding >= MXLND_CREDIT_HIGHWATER()) {
-                /* check if any txs queued that could return credits... */
-                if (cfs_list_empty(&conn->mxk_tx_credit_queue) ||
-                    conn->mxk_ntx_msgs == 0) {
-                        /* if not, send a NOOP */
-                        tx = mxlnd_get_idle_tx();
-                        if (likely(tx != NULL)) {
-                                tx->mxc_peer = peer;
-                                tx->mxc_conn = peer->mxp_conn;
-                                mxlnd_conn_addref(conn); /* for this tx */
-                                mxlnd_init_tx_msg (tx, MXLND_MSG_NOOP, 0, peer->mxp_nid);
-                                tx->mxc_match = mxlnd_create_match(tx, 0);
-                                mxlnd_peer_queue_tx_locked(tx);
-                                found = 1;
-                                goto done_locked;
-                        }
-                }
-        }
-
-        /* if the peer is not ready, try to connect */
-        if (unlikely(conn->mxk_status == MXLND_CONN_INIT ||
-            conn->mxk_status == MXLND_CONN_FAIL)) {
-                CDEBUG(D_NET, "status=%s\n", mxlnd_connstatus_to_str(conn->mxk_status));
-                mxlnd_set_conn_status(conn, MXLND_CONN_WAIT);
-               spin_unlock(&conn->mxk_lock);
-                mxlnd_iconnect(peer, (u8) MXLND_MSG_ICON_REQ);
-                goto done;
-        }
-
-        while (!cfs_list_empty(&conn->mxk_tx_free_queue) ||
-               !cfs_list_empty(&conn->mxk_tx_credit_queue)) {
-                /* We have something to send. If we have a queued tx that does not
-                 * require a credit (free), choose it since its completion will
-                 * return a credit (here or at the peer), complete a DATA or
-                 * CONN_REQ or CONN_ACK. */
-                cfs_list_t *tmp_tx = NULL;
-                if (!cfs_list_empty(&conn->mxk_tx_free_queue)) {
-                        tmp_tx = &conn->mxk_tx_free_queue;
-                } else {
-                        tmp_tx = &conn->mxk_tx_credit_queue;
-                }
-                tx = cfs_list_entry(tmp_tx->next, kmx_ctx_t, mxc_list);
-
-                msg_type = tx->mxc_msg_type;
-
-                /* don't try to send a rx */
-                LASSERT(tx->mxc_type == MXLND_REQ_TX);
-
-                /* ensure that it is a valid msg type */
-                LASSERT(msg_type == MXLND_MSG_CONN_REQ ||
-                        msg_type == MXLND_MSG_CONN_ACK ||
-                        msg_type == MXLND_MSG_NOOP     ||
-                        msg_type == MXLND_MSG_EAGER    ||
-                        msg_type == MXLND_MSG_PUT_REQ  ||
-                        msg_type == MXLND_MSG_PUT_ACK  ||
-                        msg_type == MXLND_MSG_PUT_DATA ||
-                        msg_type == MXLND_MSG_GET_REQ  ||
-                        msg_type == MXLND_MSG_GET_DATA);
-                LASSERT(tx->mxc_peer == peer);
-                LASSERT(tx->mxc_nid == peer->mxp_nid);
-
-                credit = mxlnd_tx_requires_credit(tx);
-                if (credit) {
-
-                        if (conn->mxk_ntx_posted == *kmxlnd_tunables.kmx_peercredits) {
-                                CDEBUG(D_NET, "%s: posted enough\n",
-                                              libcfs_nid2str(peer->mxp_nid));
-                                goto done_locked;
-                        }
-
-                        if (conn->mxk_credits == 0) {
-                                CDEBUG(D_NET, "%s: no credits\n",
-                                              libcfs_nid2str(peer->mxp_nid));
-                                goto done_locked;
-                        }
-
-                        if (conn->mxk_credits == 1 &&      /* last credit reserved for */
-                            conn->mxk_outstanding == 0) {  /* giving back credits */
-                                CDEBUG(D_NET, "%s: not using last credit\n",
-                                              libcfs_nid2str(peer->mxp_nid));
-                                goto done_locked;
-                        }
-                }
-
-                if (unlikely(conn->mxk_status != MXLND_CONN_READY)) {
-                        if ( ! (msg_type == MXLND_MSG_CONN_REQ ||
-                                msg_type == MXLND_MSG_CONN_ACK)) {
-                                CDEBUG(D_NET, "peer status is %s for tx 0x%llx (%s)\n",
-                                             mxlnd_connstatus_to_str(conn->mxk_status),
-                                             tx->mxc_cookie,
-                                             mxlnd_msgtype_to_str(tx->mxc_msg_type));
-                                if (conn->mxk_status == MXLND_CONN_DISCONNECT ||
-                                    cfs_time_aftereq(jiffies, tx->mxc_deadline)) {
-                                        cfs_list_del_init(&tx->mxc_list);
-                                        tx->mxc_errno = -ECONNABORTED;
-                                       spin_unlock(&conn->mxk_lock);
-                                        mxlnd_put_idle_tx(tx);
-                                        mxlnd_conn_decref(conn);
-                                        goto done;
-                                }
-                                goto done_locked;
-                        }
-                }
-
-                cfs_list_del_init(&tx->mxc_list);
-
-                /* handle credits, etc now while we have the lock to avoid races */
-                if (credit) {
-                        conn->mxk_credits--;
-                        conn->mxk_ntx_posted++;
-                }
-                if (msg_type != MXLND_MSG_PUT_DATA &&
-                    msg_type != MXLND_MSG_GET_DATA) {
-                        if (msg_type != MXLND_MSG_CONN_REQ &&
-                            msg_type != MXLND_MSG_CONN_ACK) {
-                                conn->mxk_ntx_msgs--;
-                        }
-                }
-                if (tx->mxc_incarnation == 0 &&
-                    conn->mxk_incarnation != 0) {
-                        tx->mxc_incarnation = conn->mxk_incarnation;
-                }
-
-                /* if this is a NOOP and (1) mxp_conn->mxk_outstanding < CREDIT_HIGHWATER
-                 * or (2) there is a non-DATA msg that can return credits in the
-                 * queue, then drop this duplicate NOOP */
-                if (unlikely(msg_type == MXLND_MSG_NOOP)) {
-                        if ((conn->mxk_outstanding < MXLND_CREDIT_HIGHWATER()) ||
-                            (conn->mxk_ntx_msgs >= 1)) {
-                                conn->mxk_credits++;
-                                conn->mxk_ntx_posted--;
-                               spin_unlock(&conn->mxk_lock);
-                                /* redundant NOOP */
-                                mxlnd_put_idle_tx(tx);
-                                mxlnd_conn_decref(conn);
-                                CDEBUG(D_NET, "%s: redundant noop\n",
-                                              libcfs_nid2str(peer->mxp_nid));
-                                found = 1;
-                                goto done;
-                        }
-                }
-
-                found = 1;
-                if (likely((msg_type != MXLND_MSG_PUT_DATA) &&
-                    (msg_type != MXLND_MSG_GET_DATA))) {
-                        mxlnd_pack_msg_locked(tx);
-                }
-
-                mxret = MX_SUCCESS;
-
-                status = conn->mxk_status;
-               spin_unlock(&conn->mxk_lock);
-
-                if (likely((status == MXLND_CONN_READY) ||
-                    (msg_type == MXLND_MSG_CONN_REQ) ||
-                    (msg_type == MXLND_MSG_CONN_ACK))) {
-                        ret = 0;
-                        if (msg_type != MXLND_MSG_CONN_REQ &&
-                            msg_type != MXLND_MSG_CONN_ACK) {
-                                /* add to the pending list */
-                                ret = mxlnd_q_pending_ctx(tx);
-                        } else {
-                                /* CONN_REQ/ACK */
-                                tx->mxc_state = MXLND_CTX_PENDING;
-                        }
-
-                        if (ret == 0) {
-                                if (likely(msg_type != MXLND_MSG_PUT_DATA &&
-                                    msg_type != MXLND_MSG_GET_DATA)) {
-                                        /* send a msg style tx */
-                                        LASSERT(tx->mxc_nseg == 1);
-                                        LASSERT(tx->mxc_pin_type == MX_PIN_PHYSICAL);
-                                        CDEBUG(D_NET, "sending %s 0x%llx\n",
-                                               mxlnd_msgtype_to_str(msg_type),
-                                               tx->mxc_cookie);
-                                        mxret = mx_kisend(kmxlnd_data.kmx_endpt,
-                                                          &tx->mxc_seg,
-                                                          tx->mxc_nseg,
-                                                          tx->mxc_pin_type,
-                                                          conn->mxk_epa,
-                                                          tx->mxc_match,
-                                                          (void *) tx,
-                                                          &tx->mxc_mxreq);
-                                } else {
-                                        /* send a DATA tx */
-                                       spin_lock(&conn->mxk_lock);
-                                       conn->mxk_ntx_data--;
-                                       conn->mxk_data_posted++;
-                                       spin_unlock(&conn->mxk_lock);
-                                        CDEBUG(D_NET, "sending %s 0x%llx\n",
-                                               mxlnd_msgtype_to_str(msg_type),
-                                               tx->mxc_cookie);
-                                        mxret = mx_kisend(kmxlnd_data.kmx_endpt,
-                                                          tx->mxc_seg_list,
-                                                          tx->mxc_nseg,
-                                                          tx->mxc_pin_type,
-                                                          conn->mxk_epa,
-                                                          tx->mxc_match,
-                                                          (void *) tx,
-                                                          &tx->mxc_mxreq);
-                                }
-                        } else {
-                                /* ret != 0 */
-                                mxret = MX_CONNECTION_FAILED;
-                        }
-                        if (likely(mxret == MX_SUCCESS)) {
-                                ret = 0;
-                        } else {
-                                CNETERR("mx_kisend() failed with %s (%d) "
-                                        "sending to %s\n", mx_strerror(mxret), (int) mxret,
-                                       libcfs_nid2str(peer->mxp_nid));
-                                /* NOTE mx_kisend() only fails if there are not enough
-                                * resources. Do not change the connection status. */
-                                if (mxret == MX_NO_RESOURCES) {
-                                        tx->mxc_errno = -ENOMEM;
-                                } else {
-                                        tx->mxc_errno = -ECONNABORTED;
-                                }
-                                if (credit) {
-                                       spin_lock(&conn->mxk_lock);
-                                       conn->mxk_ntx_posted--;
-                                       conn->mxk_credits++;
-                                       spin_unlock(&conn->mxk_lock);
-                               } else if (msg_type == MXLND_MSG_PUT_DATA ||
-                                          msg_type == MXLND_MSG_GET_DATA) {
-                                       spin_lock(&conn->mxk_lock);
-                                       conn->mxk_data_posted--;
-                                       spin_unlock(&conn->mxk_lock);
-                               }
-                               if (msg_type != MXLND_MSG_PUT_DATA &&
-                                   msg_type != MXLND_MSG_GET_DATA &&
-                                   msg_type != MXLND_MSG_CONN_REQ &&
-                                   msg_type != MXLND_MSG_CONN_ACK) {
-                                       spin_lock(&conn->mxk_lock);
-                                       conn->mxk_outstanding +=
-                                               tx->mxc_msg->mxm_credits;
-                                       spin_unlock(&conn->mxk_lock);
-                                }
-                                if (msg_type != MXLND_MSG_CONN_REQ &&
-                                    msg_type != MXLND_MSG_CONN_ACK) {
-                                        /* remove from the pending list */
-                                        mxlnd_deq_pending_ctx(tx);
-                                }
-                                mxlnd_put_idle_tx(tx);
-                                mxlnd_conn_decref(conn);
-                        }
-                }
-               spin_lock(&conn->mxk_lock);
-       }
-done_locked:
-       spin_unlock(&conn->mxk_lock);
-done:
-       mxlnd_conn_decref(conn); /* drop ref taken at start of function */
-       return found;
-}
-
-
-/**
- * mxlnd_handle_tx_completion - a tx completed, progress or complete the msg
- * @ctx - the tx descriptor
- *
- * Determine which type of send request it was and start the next step, if needed,
- * or, if done, signal completion to LNET. After we are done, put back on the
- * idle tx list.
- */
-void
-mxlnd_handle_tx_completion(kmx_ctx_t *tx)
-{
-        int             code    = tx->mxc_status.code;
-        int             failed  = (code != MX_STATUS_SUCCESS || tx->mxc_errno != 0);
-        kmx_msg_t       *msg    = tx->mxc_msg;
-        kmx_peer_t      *peer   = tx->mxc_peer;
-        kmx_conn_t      *conn   = tx->mxc_conn;
-        u8              type    = tx->mxc_msg_type;
-        int             credit  = mxlnd_tx_requires_credit(tx);
-        u64             cookie  = tx->mxc_cookie;
-
-        CDEBUG(D_NET, "entering %s (0x%llx):\n",
-                      mxlnd_msgtype_to_str(tx->mxc_msg_type), cookie);
-
-        LASSERT (peer != NULL);
-        LASSERT (conn != NULL);
-
-        if (type != MXLND_MSG_PUT_DATA && type != MXLND_MSG_GET_DATA) {
-                LASSERT (type == msg->mxm_type);
-        }
-
-        if (failed) {
-                if (tx->mxc_errno == 0) tx->mxc_errno = -EIO;
-        } else {
-               spin_lock(&conn->mxk_lock);
-               conn->mxk_last_tx = cfs_time_current(); /* jiffies */
-               spin_unlock(&conn->mxk_lock);
-       }
-
-       switch (type) {
-
-       case MXLND_MSG_GET_DATA:
-               spin_lock(&conn->mxk_lock);
-               if (conn->mxk_incarnation == tx->mxc_incarnation) {
-                       conn->mxk_outstanding++;
-                       conn->mxk_data_posted--;
-               }
-               spin_unlock(&conn->mxk_lock);
-               break;
-
-       case MXLND_MSG_PUT_DATA:
-               spin_lock(&conn->mxk_lock);
-               if (conn->mxk_incarnation == tx->mxc_incarnation) {
-                       conn->mxk_data_posted--;
-               }
-               spin_unlock(&conn->mxk_lock);
-               break;
-
-        case MXLND_MSG_NOOP:
-        case MXLND_MSG_PUT_REQ:
-        case MXLND_MSG_PUT_ACK:
-        case MXLND_MSG_GET_REQ:
-        case MXLND_MSG_EAGER:
-                break;
-
-        case MXLND_MSG_CONN_ACK:
-                if (peer->mxp_incompatible) {
-                        /* we sent our params, now close this conn */
-                        mxlnd_conn_disconnect(conn, 0, 1);
-                }
-        case MXLND_MSG_CONN_REQ:
-                if (failed) {
-                        CNETERR("%s failed with %s (%d) (errno = %d) to %s\n",
-                               type == MXLND_MSG_CONN_REQ ? "CONN_REQ" : "CONN_ACK",
-                               mx_strstatus(code), code, tx->mxc_errno,
-                               libcfs_nid2str(tx->mxc_nid));
-                        if (!peer->mxp_incompatible) {
-                               spin_lock(&conn->mxk_lock);
-                               if (code == MX_STATUS_BAD_SESSION)
-                                       mxlnd_set_conn_status(conn,
-                                                             MXLND_CONN_INIT);
-                               else
-                                       mxlnd_set_conn_status(conn,
-                                                             MXLND_CONN_FAIL);
-                               spin_unlock(&conn->mxk_lock);
-                        }
-                }
-                break;
-
-        default:
-                CNETERR("Unknown msg type of %d\n", type);
-                LBUG();
-        }
-
-        if (credit) {
-               spin_lock(&conn->mxk_lock);
-               if (conn->mxk_incarnation == tx->mxc_incarnation) {
-                       conn->mxk_ntx_posted--;
-               }
-               spin_unlock(&conn->mxk_lock);
-        }
-
-        mxlnd_put_idle_tx(tx);
-        mxlnd_conn_decref(conn);
-
-        mxlnd_check_sends(peer);
-
-        CDEBUG(D_NET, "leaving\n");
-        return;
-}
-
-/* Handle completion of MSG or DATA rx.
- * CONN_REQ and CONN_ACK are handled elsewhere. */
-void
-mxlnd_handle_rx_completion(kmx_ctx_t *rx)
-{
-        int             ret             = 0;
-        int             repost          = 1;
-        int             credit          = 1;
-        u32             nob             = rx->mxc_status.xfer_length;
-        u64             bits            = rx->mxc_status.match_info;
-        kmx_msg_t      *msg             = rx->mxc_msg;
-        kmx_peer_t     *peer            = rx->mxc_peer;
-        kmx_conn_t     *conn            = rx->mxc_conn;
-        u8              type            = rx->mxc_msg_type;
-        u64             seq             = bits;
-        lnet_msg_t     *lntmsg[2];
-        int             result          = 0;
-        int             peer_ref        = 0;
-        int             conn_ref        = 0;
-
-        /* NOTE We may only know the peer's nid if it is a PUT_REQ, GET_REQ,
-         * failed GET reply */
-
-        /* NOTE peer may still be NULL if it is a new peer and
-         *      conn may be NULL if this is a re-connect */
-        if (likely(peer != NULL && conn != NULL)) {
-                /* we have a reference on the conn */
-                conn_ref = 1;
-        } else if (peer != NULL && conn == NULL) {
-                /* we have a reference on the peer */
-                peer_ref = 1;
-        } else if (peer == NULL && conn != NULL) {
-                /* fatal error */
-                CERROR("rx 0x%llx from %s has conn but no peer\n",
-                       bits, libcfs_nid2str(rx->mxc_nid));
-                LBUG();
-        } /* else peer and conn == NULL */
-
-        if (conn == NULL && peer != NULL) {
-               write_lock(&kmxlnd_data.kmx_global_lock);
-                conn = peer->mxp_conn;
-                if (conn) {
-                        mxlnd_conn_addref(conn); /* conn takes ref... */
-                        mxlnd_peer_decref(peer); /* from peer */
-                        conn_ref = 1;
-                        peer_ref = 0;
-                }
-               write_unlock(&kmxlnd_data.kmx_global_lock);
-                rx->mxc_conn = conn;
-        }
-
-#if MXLND_DEBUG
-        CDEBUG(D_NET, "receiving msg bits=0x%llx nob=%d peer=0x%p\n", bits, nob, peer);
-#endif
-
-        lntmsg[0] = NULL;
-        lntmsg[1] = NULL;
-
-        if (rx->mxc_status.code != MX_STATUS_SUCCESS &&
-            rx->mxc_status.code != MX_STATUS_TRUNCATED) {
-                CNETERR("rx from %s failed with %s (%d)\n",
-                        libcfs_nid2str(rx->mxc_nid),
-                        mx_strstatus(rx->mxc_status.code),
-                        rx->mxc_status.code);
-                credit = 0;
-                goto cleanup;
-        }
-
-        if (nob == 0) {
-                /* this may be a failed GET reply */
-                if (type == MXLND_MSG_GET_DATA) {
-                        /* get the error (52-59) bits from the match bits */
-                        ret = (u32) MXLND_ERROR_VAL(rx->mxc_status.match_info);
-                        lntmsg[0] = rx->mxc_lntmsg[0];
-                        result = -ret;
-                        goto cleanup;
-                } else {
-                        /* we had a rx complete with 0 bytes (no hdr, nothing) */
-                        CNETERR("rx from %s returned with 0 bytes\n",
-                                libcfs_nid2str(rx->mxc_nid));
-                        goto cleanup;
-                }
-        }
-
-        /* NOTE PUT_DATA and GET_DATA do not have mxc_msg, do not call unpack() */
-        if (type == MXLND_MSG_PUT_DATA) {
-                /* result = 0; */
-                lntmsg[0] = rx->mxc_lntmsg[0];
-                goto cleanup;
-        } else if (type == MXLND_MSG_GET_DATA) {
-                /* result = 0; */
-                lntmsg[0] = rx->mxc_lntmsg[0];
-                lntmsg[1] = rx->mxc_lntmsg[1];
-                goto cleanup;
-        }
-
-        ret = mxlnd_unpack_msg(msg, nob);
-        if (ret != 0) {
-                CNETERR("Error %d unpacking rx from %s\n",
-                        ret, libcfs_nid2str(rx->mxc_nid));
-                goto cleanup;
-        }
-        rx->mxc_nob = nob;
-        type = msg->mxm_type;
-
-        if (rx->mxc_nid != msg->mxm_srcnid ||
-            kmxlnd_data.kmx_ni->ni_nid != msg->mxm_dstnid) {
-                CNETERR("rx with mismatched NID (type %s) (my nid is "
-                       "0x%llx and rx msg dst is 0x%llx)\n",
-                       mxlnd_msgtype_to_str(type), kmxlnd_data.kmx_ni->ni_nid,
-                       msg->mxm_dstnid);
-                goto cleanup;
-        }
-
-        if ((conn != NULL && msg->mxm_srcstamp != conn->mxk_incarnation) ||
-            msg->mxm_dststamp != kmxlnd_data.kmx_incarnation) {
-                CNETERR("Stale rx from %s with type %s "
-                       "(mxm_srcstamp (%lld) != mxk_incarnation (%lld) "
-                       "|| mxm_dststamp (%lld) != kmx_incarnation (%lld))\n",
-                       libcfs_nid2str(rx->mxc_nid), mxlnd_msgtype_to_str(type),
-                       msg->mxm_srcstamp, conn->mxk_incarnation,
-                       msg->mxm_dststamp, kmxlnd_data.kmx_incarnation);
-                credit = 0;
-                goto cleanup;
-        }
-
-        CDEBUG(D_NET, "Received %s with %d credits\n",
-                      mxlnd_msgtype_to_str(type), msg->mxm_credits);
-
-        LASSERT(peer != NULL && conn != NULL);
-        if (msg->mxm_credits != 0) {
-               spin_lock(&conn->mxk_lock);
-                if (msg->mxm_srcstamp == conn->mxk_incarnation) {
-                        if ((conn->mxk_credits + msg->mxm_credits) >
-                             *kmxlnd_tunables.kmx_peercredits) {
-                                CNETERR("mxk_credits %d  mxm_credits %d\n",
-                                        conn->mxk_credits, msg->mxm_credits);
-                        }
-                        conn->mxk_credits += msg->mxm_credits;
-                        LASSERT(conn->mxk_credits >= 0);
-                        LASSERT(conn->mxk_credits <= *kmxlnd_tunables.kmx_peercredits);
-                }
-               spin_unlock(&conn->mxk_lock);
-        }
-
-        CDEBUG(D_NET, "switch %s for rx (0x%llx)\n", mxlnd_msgtype_to_str(type), seq);
-        switch (type) {
-        case MXLND_MSG_NOOP:
-                break;
-
-        case MXLND_MSG_EAGER:
-                ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.eager.mxem_hdr,
-                                        msg->mxm_srcnid, rx, 0);
-                repost = ret < 0;
-                break;
-
-        case MXLND_MSG_PUT_REQ:
-                ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.put_req.mxprm_hdr,
-                                        msg->mxm_srcnid, rx, 1);
-                repost = ret < 0;
-                break;
-
-        case MXLND_MSG_PUT_ACK: {
-                u64  cookie = (u64) msg->mxm_u.put_ack.mxpam_dst_cookie;
-                if (cookie > MXLND_MAX_COOKIE) {
-                        CNETERR("NAK for msg_type %d from %s\n", rx->mxc_msg_type,
-                                libcfs_nid2str(rx->mxc_nid));
-                        result = -((u32) MXLND_ERROR_VAL(cookie));
-                        lntmsg[0] = rx->mxc_lntmsg[0];
-                } else {
-                        mxlnd_send_data(kmxlnd_data.kmx_ni, rx->mxc_lntmsg[0],
-                                        rx->mxc_peer, MXLND_MSG_PUT_DATA,
-                                        rx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie);
-                }
-                /* repost == 1 */
-                break;
-        }
-        case MXLND_MSG_GET_REQ:
-                ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.get_req.mxgrm_hdr,
-                                        msg->mxm_srcnid, rx, 1);
-                repost = ret < 0;
-                break;
-
-        default:
-                CNETERR("Bad MXLND message type %x from %s\n", msg->mxm_type,
-                        libcfs_nid2str(rx->mxc_nid));
-                ret = -EPROTO;
-                break;
-        }
-
-        if (ret < 0) {
-                CDEBUG(D_NET, "setting PEER_CONN_FAILED\n");
-               spin_lock(&conn->mxk_lock);
-               mxlnd_set_conn_status(conn, MXLND_CONN_FAIL);
-               spin_unlock(&conn->mxk_lock);
-       }
-
-cleanup:
-       if (conn != NULL) {
-               spin_lock(&conn->mxk_lock);
-               conn->mxk_last_rx = cfs_time_current(); /* jiffies */
-               spin_unlock(&conn->mxk_lock);
-        }
-
-        if (repost) {
-                /* lnet_parse() failed, etc., repost now */
-                mxlnd_put_idle_rx(rx);
-                if (conn != NULL && credit == 1) {
-                        if (type == MXLND_MSG_PUT_DATA ||
-                            type == MXLND_MSG_EAGER ||
-                            type == MXLND_MSG_PUT_REQ ||
-                            type == MXLND_MSG_NOOP) {
-                               spin_lock(&conn->mxk_lock);
-                               conn->mxk_outstanding++;
-                               spin_unlock(&conn->mxk_lock);
-                        }
-                }
-                if (conn_ref) mxlnd_conn_decref(conn);
-                LASSERT(peer_ref == 0);
-        }
-
-        if (type == MXLND_MSG_PUT_DATA || type == MXLND_MSG_GET_DATA) {
-                CDEBUG(D_NET, "leaving for rx (0x%llx)\n", bits);
-        } else {
-                CDEBUG(D_NET, "leaving for rx (0x%llx)\n", seq);
-        }
-
-        if (lntmsg[0] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[0], result);
-        if (lntmsg[1] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[1], result);
-
-        if (conn != NULL && credit == 1) mxlnd_check_sends(peer);
-
-        return;
-}
-
-void
-mxlnd_handle_connect_msg(kmx_peer_t *peer, u8 msg_type, mx_status_t status)
-{
-        kmx_ctx_t       *tx     = NULL;
-        kmx_msg_t       *txmsg  = NULL;
-        kmx_conn_t      *conn   = peer->mxp_conn;
-        u64             nic_id  = 0ULL;
-        u32             ep_id   = 0;
-        u32             sid     = 0;
-        u8              type    = (msg_type == MXLND_MSG_ICON_REQ ?
-                                   MXLND_MSG_CONN_REQ : MXLND_MSG_CONN_ACK);
-
-        /* a conn ref was taken when calling mx_iconnect(),
-         * hold it until CONN_REQ or CONN_ACK completes */
-
-        CDEBUG(D_NET, "entering\n");
-        if (status.code != MX_STATUS_SUCCESS) {
-                int send_bye    = (msg_type == MXLND_MSG_ICON_REQ ? 0 : 1);
-
-                CNETERR("mx_iconnect() failed for %s with %s (%d) "
-                        "to %s mxp_nid = 0x%llx mxp_nic_id = 0x%0llx mxp_ep_id = %d\n",
-                        mxlnd_msgtype_to_str(msg_type),
-                        mx_strstatus(status.code), status.code,
-                        libcfs_nid2str(peer->mxp_nid),
-                        peer->mxp_nid,
-                        peer->mxp_nic_id,
-                        peer->mxp_ep_id);
-               spin_lock(&conn->mxk_lock);
-               mxlnd_set_conn_status(conn, MXLND_CONN_FAIL);
-               spin_unlock(&conn->mxk_lock);
-
-                if (cfs_time_after(jiffies, peer->mxp_reconnect_time +
-                                   MXLND_CONNECT_TIMEOUT)) {
-                        CNETERR("timeout, calling conn_disconnect()\n");
-                        mxlnd_conn_disconnect(conn, 0, send_bye);
-                }
-
-                mxlnd_conn_decref(conn);
-                return;
-        }
-        mx_decompose_endpoint_addr2(status.source, &nic_id, &ep_id, &sid);
-       write_lock(&kmxlnd_data.kmx_global_lock);
-       spin_lock(&conn->mxk_lock);
-       conn->mxk_epa = status.source;
-       mx_set_endpoint_addr_context(conn->mxk_epa, (void *) conn);
-       if (msg_type == MXLND_MSG_ICON_ACK && likely(!peer->mxp_incompatible)) {
-               mxlnd_set_conn_status(conn, MXLND_CONN_READY);
-       }
-       spin_unlock(&conn->mxk_lock);
-       write_unlock(&kmxlnd_data.kmx_global_lock);
-
-       /* mx_iconnect() succeeded, reset delay to 0 */
-       write_lock(&kmxlnd_data.kmx_global_lock);
-       peer->mxp_reconnect_time = 0;
-       peer->mxp_conn->mxk_sid = sid;
-       write_unlock(&kmxlnd_data.kmx_global_lock);
-
-        /* marshal CONN_REQ or CONN_ACK msg */
-        /* we are still using the conn ref from iconnect() - do not take another */
-        tx = mxlnd_get_idle_tx();
-        if (tx == NULL) {
-                CNETERR("Can't obtain %s tx for %s\n",
-                       mxlnd_msgtype_to_str(type),
-                       libcfs_nid2str(peer->mxp_nid));
-               spin_lock(&conn->mxk_lock);
-               mxlnd_set_conn_status(conn, MXLND_CONN_FAIL);
-               spin_unlock(&conn->mxk_lock);
-                mxlnd_conn_decref(conn);
-                return;
-        }
-
-        tx->mxc_peer = peer;
-        tx->mxc_conn = conn;
-        tx->mxc_deadline = jiffies + MXLND_CONNECT_TIMEOUT;
-        CDEBUG(D_NET, "sending %s\n", mxlnd_msgtype_to_str(type));
-        mxlnd_init_tx_msg (tx, type, sizeof(kmx_connreq_msg_t), peer->mxp_nid);
-        txmsg = tx->mxc_msg;
-        txmsg->mxm_u.conn_req.mxcrm_queue_depth = *kmxlnd_tunables.kmx_peercredits;
-        txmsg->mxm_u.conn_req.mxcrm_eager_size = MXLND_MSG_SIZE;
-        tx->mxc_match = mxlnd_create_match(tx, 0);
-
-        mxlnd_queue_tx(tx);
-        return;
-}
-
-/**
- * mxlnd_request_waitd - the MX request completion thread(s)
- * @arg - thread id (as a void *)
- *
- * This thread waits for a MX completion and then completes the request.
- * We will create one thread per CPU.
- */
-int
-mxlnd_request_waitd(void *arg)
-{
-        long                    id              = (long) arg;
-        __u32                   result          = 0;
-        mx_return_t             mxret           = MX_SUCCESS;
-        mx_status_t             status;
-        kmx_ctx_t              *ctx             = NULL;
-        enum kmx_req_state      req_type        = MXLND_REQ_TX;
-        kmx_peer_t             *peer            = NULL;
-        kmx_conn_t             *conn            = NULL;
-#if MXLND_POLLING
-        int                     count           = 0;
-#endif
-
-        memset(&status, 0, sizeof(status));
-
-        CDEBUG(D_NET, "%s starting\n", name);
-
-       while (!(atomic_read(&kmxlnd_data.kmx_shutdown))) {
-               u8      msg_type        = 0;
-
-                mxret = MX_SUCCESS;
-                result = 0;
-#if MXLND_POLLING
-                if (id == 0 && count++ < *kmxlnd_tunables.kmx_polling) {
-                        mxret = mx_test_any(kmxlnd_data.kmx_endpt, 0ULL, 0ULL,
-                                            &status, &result);
-                } else {
-                        count = 0;
-                        mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT,
-                                            0ULL, 0ULL, &status, &result);
-                }
-#else
-                mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT,
-                                    0ULL, 0ULL, &status, &result);
-#endif
-               if (unlikely(atomic_read(&kmxlnd_data.kmx_shutdown)))
-                       break;
-
-                if (result != 1) {
-                        /* nothing completed... */
-                        continue;
-                }
-
-                CDEBUG(D_NET, "wait_any() returned with %s (%d) with "
-                       "match_info 0x%llx and length %d\n",
-                       mx_strstatus(status.code), status.code,
-                       (u64) status.match_info, status.msg_length);
-
-                if (status.code != MX_STATUS_SUCCESS) {
-                        CNETERR("wait_any() failed with %s (%d) with "
-                                "match_info 0x%llx and length %d\n",
-                                mx_strstatus(status.code), status.code,
-                                (u64) status.match_info, status.msg_length);
-                }
-
-                msg_type = MXLND_MSG_TYPE(status.match_info);
-
-                /* This may be a mx_iconnect() request completing,
-                 * check the bit mask for CONN_REQ and CONN_ACK */
-                if (msg_type == MXLND_MSG_ICON_REQ ||
-                    msg_type == MXLND_MSG_ICON_ACK) {
-                        peer = (kmx_peer_t*) status.context;
-                        mxlnd_handle_connect_msg(peer, msg_type, status);
-                        continue;
-                }
-
-                /* This must be a tx or rx */
-
-                /* NOTE: if this is a RX from the unexpected callback, it may
-                 * have very little info. If we dropped it in unexpected_recv(),
-                 * it will not have a context. If so, ignore it. */
-                ctx = (kmx_ctx_t *) status.context;
-                if (ctx != NULL) {
-
-                        req_type = ctx->mxc_type;
-                        conn = ctx->mxc_conn; /* this may be NULL */
-                        mxlnd_deq_pending_ctx(ctx);
-
-                        /* copy status to ctx->mxc_status */
-                        ctx->mxc_status = status;
-
-                        switch (req_type) {
-                        case MXLND_REQ_TX:
-                                mxlnd_handle_tx_completion(ctx);
-                                break;
-                        case MXLND_REQ_RX:
-                                mxlnd_handle_rx_completion(ctx);
-                                break;
-                        default:
-                                CNETERR("Unknown ctx type %d\n", req_type);
-                                LBUG();
-                                break;
-                        }
-
-                        /* conn is always set except for the first CONN_REQ rx
-                         * from a new peer */
-                        if (status.code != MX_STATUS_SUCCESS && conn != NULL) {
-                                mxlnd_conn_disconnect(conn, 1, 1);
-                        }
-                }
-                CDEBUG(D_NET, "waitd() completed task\n");
-        }
-        CDEBUG(D_NET, "%s stopping\n", name);
-        mxlnd_thread_stop(id);
-        return 0;
-}
-
-
-unsigned long
-mxlnd_check_timeouts(unsigned long now)
-{
-        int             i               = 0;
-        int             disconnect      = 0;
-        unsigned long   next            = 0; /* jiffies */
-        kmx_peer_t      *peer           = NULL;
-        kmx_conn_t      *conn           = NULL;
-       rwlock_t        *g_lock         = &kmxlnd_data.kmx_global_lock;
-
-       read_lock(g_lock);
-       for (i = 0; i < MXLND_HASH_SIZE; i++) {
-               cfs_list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i],
-                                       mxp_list) {
-
-                       if (unlikely(atomic_read(&kmxlnd_data.kmx_shutdown))) {
-                               read_unlock(g_lock);
-                               return next;
-                       }
-
-                        conn = peer->mxp_conn;
-                        if (conn) {
-                                mxlnd_conn_addref(conn);
-                        } else {
-                                continue;
-                        }
-
-                       spin_lock(&conn->mxk_lock);
-
-                       /* if nothing pending (timeout == 0) or
-                        * if conn is already disconnected,
-                        * skip this conn */
-                       if (conn->mxk_timeout == 0 ||
-                           conn->mxk_status == MXLND_CONN_DISCONNECT) {
-                               spin_unlock(&conn->mxk_lock);
-                                mxlnd_conn_decref(conn);
-                                continue;
-                        }
-
-                        /* we want to find the timeout that will occur first.
-                         * if it is in the future, we will sleep until then.
-                         * if it is in the past, then we will sleep one
-                         * second and repeat the process. */
-                        if ((next == 0) ||
-                            (cfs_time_before(conn->mxk_timeout, next))) {
-                                next = conn->mxk_timeout;
-                        }
-
-                        disconnect = 0;
-
-                       if (cfs_time_aftereq(now, conn->mxk_timeout))
-                               disconnect = 1;
-                       spin_unlock(&conn->mxk_lock);
-
-                       if (disconnect)
-                               mxlnd_conn_disconnect(conn, 1, 1);
-                       mxlnd_conn_decref(conn);
-               }
-       }
-       read_unlock(g_lock);
-       if (next == 0)
-               next = now + MXLND_COMM_TIMEOUT;
-
-       return next;
-}
-
-void
-mxlnd_passive_connect(kmx_connparams_t *cp)
-{
-        int             ret             = 0;
-        int             incompatible    = 0;
-        u64             nic_id          = 0ULL;
-        u32             ep_id           = 0;
-        u32             sid             = 0;
-        int             conn_ref        = 0;
-        kmx_msg_t       *msg            = &cp->mxr_msg;
-        kmx_peer_t      *peer           = cp->mxr_peer;
-        kmx_conn_t      *conn           = NULL;
-       rwlock_t        *g_lock         = &kmxlnd_data.kmx_global_lock;
-
-        mx_decompose_endpoint_addr2(cp->mxr_epa, &nic_id, &ep_id, &sid);
-
-        ret = mxlnd_unpack_msg(msg, cp->mxr_nob);
-        if (ret != 0) {
-                if (peer) {
-                        CNETERR("Error %d unpacking CONN_REQ from %s\n",
-                               ret, libcfs_nid2str(peer->mxp_nid));
-                } else {
-                        CNETERR("Error %d unpacking CONN_REQ from "
-                               "unknown host with nic_id 0x%llx\n", ret, nic_id);
-                }
-                goto cleanup;
-        }
-        if (kmxlnd_data.kmx_ni->ni_nid != msg->mxm_dstnid) {
-                CNETERR("Can't accept %s: bad dst nid %s\n",
-                                libcfs_nid2str(msg->mxm_srcnid),
-                                libcfs_nid2str(msg->mxm_dstnid));
-                goto cleanup;
-        }
-        if (msg->mxm_u.conn_req.mxcrm_queue_depth != *kmxlnd_tunables.kmx_peercredits) {
-                CNETERR("Can't accept %s: incompatible queue depth "
-                            "%d (%d wanted)\n",
-                                libcfs_nid2str(msg->mxm_srcnid),
-                                msg->mxm_u.conn_req.mxcrm_queue_depth,
-                                *kmxlnd_tunables.kmx_peercredits);
-                incompatible = 1;
-        }
-        if (msg->mxm_u.conn_req.mxcrm_eager_size != MXLND_MSG_SIZE) {
-                CNETERR("Can't accept %s: incompatible EAGER size "
-                            "%d (%d wanted)\n",
-                                libcfs_nid2str(msg->mxm_srcnid),
-                                msg->mxm_u.conn_req.mxcrm_eager_size,
-                                (int) MXLND_MSG_SIZE);
-                incompatible = 1;
-        }
-
-        if (peer == NULL) {
-                peer = mxlnd_find_peer_by_nid(msg->mxm_srcnid, 0); /* adds peer ref */
-                if (peer == NULL) {
-                        int             hash    = 0;
-                        u32             board   = 0;
-                        kmx_peer_t      *existing_peer    = NULL;
-
-                        hash = mxlnd_nid_to_hash(msg->mxm_srcnid);
-
-                        mx_nic_id_to_board_number(nic_id, &board);
-
-                        /* adds conn ref for peer and one for this function */
-                        ret = mxlnd_peer_alloc(&peer, msg->mxm_srcnid,
-                                               board, ep_id, 0ULL);
-                        if (ret != 0) {
-                                goto cleanup;
-                        }
-                        peer->mxp_conn->mxk_sid = sid;
-                        LASSERT(peer->mxp_ep_id == ep_id);
-                       write_lock(g_lock);
-                        existing_peer = mxlnd_find_peer_by_nid_locked(msg->mxm_srcnid);
-                        if (existing_peer) {
-                                mxlnd_conn_decref(peer->mxp_conn);
-                                mxlnd_peer_decref(peer);
-                                peer = existing_peer;
-                                mxlnd_conn_addref(peer->mxp_conn);
-                                conn = peer->mxp_conn;
-                       } else {
-                               cfs_list_add_tail(&peer->mxp_list,
-                                                 &kmxlnd_data.kmx_peers[hash]);
-                               atomic_inc(&kmxlnd_data.kmx_npeers);
-                       }
-                       write_unlock(g_lock);
-                } else {
-                        ret = mxlnd_conn_alloc(&conn, peer); /* adds 2nd ref */
-                       write_lock(g_lock);
-                        mxlnd_peer_decref(peer); /* drop ref taken above */
-                       write_unlock(g_lock);
-                        if (ret != 0) {
-                                CNETERR("Cannot allocate mxp_conn\n");
-                                goto cleanup;
-                        }
-                }
-                conn_ref = 1; /* peer/conn_alloc() added ref for this function */
-                conn = peer->mxp_conn;
-        } else { /* unexpected handler found peer */
-                kmx_conn_t      *old_conn       = peer->mxp_conn;
-
-                if (sid != peer->mxp_conn->mxk_sid) {
-                        /* do not call mx_disconnect() or send a BYE */
-                        mxlnd_conn_disconnect(old_conn, 0, 0);
-
-                        /* This allocs a conn, points peer->mxp_conn to this one.
-                        * The old conn is still on the peer->mxp_conns list.
-                        * As the pending requests complete, they will call
-                        * conn_decref() which will eventually free it. */
-                        ret = mxlnd_conn_alloc(&conn, peer);
-                        if (ret != 0) {
-                                CNETERR("Cannot allocate peer->mxp_conn\n");
-                                goto cleanup;
-                        }
-                        /* conn_alloc() adds one ref for the peer and one
-                         * for this function */
-                        conn_ref = 1;
-
-                        peer->mxp_conn->mxk_sid = sid;
-                } else {
-                        /* same sid */
-                        conn = peer->mxp_conn;
-                }
-        }
-       write_lock(g_lock);
-       peer->mxp_incompatible = incompatible;
-       write_unlock(g_lock);
-       spin_lock(&conn->mxk_lock);
-       conn->mxk_incarnation = msg->mxm_srcstamp;
-       mxlnd_set_conn_status(conn, MXLND_CONN_WAIT);
-       spin_unlock(&conn->mxk_lock);
-
-        /* handle_conn_ack() will create the CONN_ACK msg */
-        mxlnd_iconnect(peer, (u8) MXLND_MSG_ICON_ACK);
-
-cleanup:
-        if (conn_ref) mxlnd_conn_decref(conn);
-
-        mxlnd_connparams_free(cp);
-        return;
-}
-
-void
-mxlnd_check_conn_ack(kmx_connparams_t *cp)
-{
-        int             ret             = 0;
-        int             incompatible    = 0;
-        u64             nic_id          = 0ULL;
-        u32             ep_id           = 0;
-        u32             sid             = 0;
-        kmx_msg_t       *msg            = &cp->mxr_msg;
-        kmx_peer_t      *peer           = cp->mxr_peer;
-        kmx_conn_t      *conn           = cp->mxr_conn;
-
-        mx_decompose_endpoint_addr2(cp->mxr_epa, &nic_id, &ep_id, &sid);
-
-        ret = mxlnd_unpack_msg(msg, cp->mxr_nob);
-        if (ret != 0) {
-                if (peer) {
-                        CNETERR("Error %d unpacking CONN_ACK from %s\n",
-                               ret, libcfs_nid2str(peer->mxp_nid));
-                } else {
-                        CNETERR("Error %d unpacking CONN_ACK from "
-                               "unknown host with nic_id 0x%llx\n", ret, nic_id);
-                }
-                ret = -1;
-                incompatible = 1;
-                goto failed;
-        }
-        if (kmxlnd_data.kmx_ni->ni_nid != msg->mxm_dstnid) {
-                CNETERR("Can't accept CONN_ACK from %s: "
-                       "bad dst nid %s\n", libcfs_nid2str(msg->mxm_srcnid),
-                        libcfs_nid2str(msg->mxm_dstnid));
-                ret = -1;
-                goto failed;
-        }
-        if (msg->mxm_u.conn_req.mxcrm_queue_depth != *kmxlnd_tunables.kmx_peercredits) {
-                CNETERR("Can't accept CONN_ACK from %s: "
-                       "incompatible queue depth %d (%d wanted)\n",
-                        libcfs_nid2str(msg->mxm_srcnid),
-                        msg->mxm_u.conn_req.mxcrm_queue_depth,
-                        *kmxlnd_tunables.kmx_peercredits);
-                incompatible = 1;
-                ret = -1;
-                goto failed;
-        }
-        if (msg->mxm_u.conn_req.mxcrm_eager_size != MXLND_MSG_SIZE) {
-                CNETERR("Can't accept CONN_ACK from %s: "
-                        "incompatible EAGER size %d (%d wanted)\n",
-                        libcfs_nid2str(msg->mxm_srcnid),
-                        msg->mxm_u.conn_req.mxcrm_eager_size,
-                        (int) MXLND_MSG_SIZE);
-                incompatible = 1;
-                ret = -1;
-                goto failed;
-        }
-       write_lock(&kmxlnd_data.kmx_global_lock);
-       peer->mxp_incompatible = incompatible;
-       write_unlock(&kmxlnd_data.kmx_global_lock);
-       spin_lock(&conn->mxk_lock);
-        conn->mxk_credits = *kmxlnd_tunables.kmx_peercredits;
-        conn->mxk_outstanding = 0;
-        conn->mxk_incarnation = msg->mxm_srcstamp;
-        conn->mxk_timeout = 0;
-        if (!incompatible) {
-                CDEBUG(D_NET, "setting peer %s CONN_READY\n",
-                       libcfs_nid2str(msg->mxm_srcnid));
-                mxlnd_set_conn_status(conn, MXLND_CONN_READY);
-        }
-       spin_unlock(&conn->mxk_lock);
-
-       if (!incompatible)
-               mxlnd_check_sends(peer);
-
-failed:
-       if (ret < 0) {
-               spin_lock(&conn->mxk_lock);
-               mxlnd_set_conn_status(conn, MXLND_CONN_FAIL);
-               spin_unlock(&conn->mxk_lock);
-       }
-
-       if (incompatible) mxlnd_conn_disconnect(conn, 0, 0);
-
-       mxlnd_connparams_free(cp);
-       return;
-}
-
-int
-mxlnd_abort_msgs(void)
-{
-       int                     count           = 0;
-       cfs_list_t              *orphans        = &kmxlnd_data.kmx_orphan_msgs;
-       spinlock_t              *g_conn_lock    = &kmxlnd_data.kmx_conn_lock;
-
-       /* abort orphans */
-       spin_lock(g_conn_lock);
-       while (!cfs_list_empty(orphans)) {
-               kmx_ctx_t       *ctx     = NULL;
-               kmx_conn_t      *conn   = NULL;
-
-               ctx = cfs_list_entry(orphans->next, kmx_ctx_t, mxc_list);
-               cfs_list_del_init(&ctx->mxc_list);
-               spin_unlock(g_conn_lock);
-
-                ctx->mxc_errno = -ECONNABORTED;
-                conn = ctx->mxc_conn;
-                CDEBUG(D_NET, "aborting %s %s %s\n",
-                       mxlnd_msgtype_to_str(ctx->mxc_msg_type),
-                       ctx->mxc_type == MXLND_REQ_TX ? "(TX) to" : "(RX) from",
-                       libcfs_nid2str(ctx->mxc_nid));
-                if (ctx->mxc_type == MXLND_REQ_TX) {
-                        mxlnd_put_idle_tx(ctx); /* do not hold any locks */
-                        if (conn) mxlnd_conn_decref(conn); /* for this tx */
-                } else {
-                        ctx->mxc_state = MXLND_CTX_CANCELED;
-                        mxlnd_handle_rx_completion(ctx);
-                }
-
-                count++;
-               spin_lock(g_conn_lock);
-       }
-       spin_unlock(g_conn_lock);
-
-       return count;
-}
-
-int
-mxlnd_free_conn_zombies(void)
-{
-       int             count           = 0;
-       cfs_list_t      *zombies        = &kmxlnd_data.kmx_conn_zombies;
-       spinlock_t      *g_conn_lock    = &kmxlnd_data.kmx_conn_lock;
-       rwlock_t        *g_lock         = &kmxlnd_data.kmx_global_lock;
-
-       /* cleanup any zombies */
-       spin_lock(g_conn_lock);
-       while (!cfs_list_empty(zombies)) {
-               kmx_conn_t      *conn   = NULL;
-
-               conn = cfs_list_entry(zombies->next, kmx_conn_t, mxk_zombie);
-               cfs_list_del_init(&conn->mxk_zombie);
-               spin_unlock(g_conn_lock);
-
-               write_lock(g_lock);
-               mxlnd_conn_free_locked(conn);
-               write_unlock(g_lock);
-
-               count++;
-               spin_lock(g_conn_lock);
-       }
-       spin_unlock(g_conn_lock);
-       CDEBUG(D_NET, "%s: freed %d zombies\n", __func__, count);
-       return count;
-}
-
-/**
- * mxlnd_connd - handles incoming connection requests
- * @arg - thread id (as a void *)
- *
- * This thread handles incoming connection requests
- */
-int
-mxlnd_connd(void *arg)
-{
-       long                    id              = (long) arg;
-
-       CDEBUG(D_NET, "connd starting\n");
-
-       while (!(atomic_read(&kmxlnd_data.kmx_shutdown))) {
-               int                ret             = 0;
-               kmx_connparams_t  *cp              = NULL;
-               spinlock_t        *g_conn_lock  = &kmxlnd_data.kmx_conn_lock;
-               cfs_list_t        *conn_reqs    = &kmxlnd_data.kmx_conn_reqs;
-
-               ret = down_interruptible(&kmxlnd_data.kmx_conn_sem);
-
-               if (atomic_read(&kmxlnd_data.kmx_shutdown))
-                       break;
-
-                if (ret != 0)
-                        continue;
-
-                ret = mxlnd_abort_msgs();
-                ret += mxlnd_free_conn_zombies();
-
-               spin_lock(g_conn_lock);
-               if (cfs_list_empty(conn_reqs)) {
-                       if (ret == 0)
-                               CNETERR("connd woke up but did not find a "
-                                       "kmx_connparams_t or zombie conn\n");
-                       spin_unlock(g_conn_lock);
-                       continue;
-               }
-               cp = cfs_list_entry(conn_reqs->next, kmx_connparams_t,
-                                   mxr_list);
-               cfs_list_del_init(&cp->mxr_list);
-               spin_unlock(g_conn_lock);
-
-                switch (MXLND_MSG_TYPE(cp->mxr_match)) {
-                case MXLND_MSG_CONN_REQ:
-                        /* We have a connection request. Handle it. */
-                        mxlnd_passive_connect(cp);
-                        break;
-                case MXLND_MSG_CONN_ACK:
-                        /* The peer is ready for messages */
-                        mxlnd_check_conn_ack(cp);
-                        break;
-                }
-        }
-
-        mxlnd_free_conn_zombies();
-
-        CDEBUG(D_NET, "connd stopping\n");
-        mxlnd_thread_stop(id);
-        return 0;
-}
-
-/**
- * mxlnd_timeoutd - enforces timeouts on messages
- * @arg - thread id (as a void *)
- *
- * This thread queries each peer for its earliest timeout. If a peer has timed out,
- * it calls mxlnd_conn_disconnect().
- *
- * After checking for timeouts, try progressing sends (call check_sends()).
- */
-int
-mxlnd_timeoutd(void *arg)
-{
-       int             i       = 0;
-       long            id      = (long) arg;
-       unsigned long   now     = 0;
-       unsigned long   next    = 0;
-       unsigned long   delay   = msecs_to_jiffies(MSEC_PER_SEC);
-       kmx_peer_t     *peer    = NULL;
-       kmx_peer_t     *temp    = NULL;
-       kmx_conn_t     *conn    = NULL;
-       rwlock_t   *g_lock  = &kmxlnd_data.kmx_global_lock;
-
-        CDEBUG(D_NET, "timeoutd starting\n");
-
-       while (!(atomic_read(&kmxlnd_data.kmx_shutdown))) {
-
-                now = jiffies;
-                /* if the next timeout has not arrived, go back to sleep */
-                if (cfs_time_after(now, next)) {
-                        next = mxlnd_check_timeouts(now);
-                }
-
-                /* try to progress peers' txs */
-               write_lock(g_lock);
-                for (i = 0; i < MXLND_HASH_SIZE; i++) {
-                        cfs_list_t *peers = &kmxlnd_data.kmx_peers[i];
-
-                        /* NOTE we are safe against the removal of peer, but
-                         * not against the removal of temp */
-                        cfs_list_for_each_entry_safe(peer, temp, peers,
-                                                     mxp_list) {
-                               if (atomic_read(&kmxlnd_data.kmx_shutdown))
-                                        break;
-                                mxlnd_peer_addref(peer); /* add ref... */
-                                conn = peer->mxp_conn;
-                                if (conn && conn->mxk_status != MXLND_CONN_DISCONNECT) {
-                                        mxlnd_conn_addref(conn); /* take ref... */
-                                } else {
-                                        CDEBUG(D_NET, "ignoring %s\n",
-                                               libcfs_nid2str(peer->mxp_nid));
-                                        mxlnd_peer_decref(peer); /* ...to here */
-                                        continue;
-                                }
-
-                               if ((conn->mxk_status == MXLND_CONN_READY ||
-                                   conn->mxk_status == MXLND_CONN_FAIL) &&
-                                   cfs_time_after(now,
-                                                  conn->mxk_last_tx +
-                                                  msecs_to_jiffies(MSEC_PER_SEC))) {
-                                       write_unlock(g_lock);
-                                       mxlnd_check_sends(peer);
-                                       write_lock(g_lock);
-                               }
-                               mxlnd_conn_decref(conn); /* until here */
-                               mxlnd_peer_decref(peer); /* ...to here */
-                       }
-               }
-               write_unlock(g_lock);
-
-                mxlnd_sleep(delay);
-        }
-        CDEBUG(D_NET, "timeoutd stopping\n");
-        mxlnd_thread_stop(id);
-        return 0;
-}
diff --git a/lnet/klnds/mxlnd/mxlnd_modparams.c b/lnet/klnds/mxlnd/mxlnd_modparams.c
deleted file mode 100644 (file)
index 5da8d89..0000000
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (C) 2006 Myricom, Inc.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/mxlnd/mxlnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- * Author: Scott Atchley <atchley at myri.com>
- */
-
-#include "mxlnd.h"
-
-static int n_waitd = MXLND_N_SCHED;
-CFS_MODULE_PARM(n_waitd, "i", int, 0444,
-                "# of completion daemons");
-
-/* this was used to allocate global rxs which are no londer used */
-static int max_peers = MXLND_MAX_PEERS;
-CFS_MODULE_PARM(max_peers, "i", int, 0444,
-                "Unused - was maximum number of peers that may connect");
-
-static int cksum = MXLND_CKSUM;
-CFS_MODULE_PARM(cksum, "i", int, 0644,
-                "set non-zero to enable message (not data payload) checksums");
-
-static int ntx = MXLND_NTX;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
-                "# of total tx message descriptors");
-
-/* this duplicates ntx */
-static int credits = MXLND_NTX;
-CFS_MODULE_PARM(credits, "i", int, 0444,
-                "Unused - was # concurrent sends to all peers");
-
-static int peercredits = MXLND_MSG_QUEUE_DEPTH;
-CFS_MODULE_PARM(peercredits, "i", int, 0444,
-                "# concurrent sends to one peer");
-
-static int board = MXLND_MX_BOARD;
-CFS_MODULE_PARM(board, "i", int, 0444,
-                "index value of the Myrinet board (NIC)");
-
-static int ep_id = MXLND_MX_EP_ID;
-CFS_MODULE_PARM(ep_id, "i", int, 0444, "MX endpoint ID");
-
-static char *ipif_name = "myri0";
-CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
-                "IPoMX interface name");
-
-static int polling = MXLND_POLLING;
-CFS_MODULE_PARM(polling, "i", int, 0444,
-                "Use 0 to block (wait). A value > 0 will poll that many times before blocking");
-
-static char *hosts = NULL;
-CFS_MODULE_PARM(hosts, "s", charp, 0444,
-                "Unused - was IP-to-hostname resolution file");
-
-kmx_tunables_t kmxlnd_tunables = {
-        .kmx_n_waitd            = &n_waitd,
-        .kmx_max_peers          = &max_peers,
-        .kmx_cksum              = &cksum,
-        .kmx_ntx                = &ntx,
-        .kmx_credits            = &credits,
-        .kmx_peercredits        = &peercredits,
-        .kmx_board              = &board,
-        .kmx_ep_id              = &ep_id,
-        .kmx_default_ipif       = &ipif_name,
-        .kmx_polling            = &polling
-};
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-static char ipif_basename_space[32];
-
-static struct ctl_table kmxlnd_ctl_table[] = {
-       {
-               INIT_CTL_NAME
-               .procname       = "n_waitd",
-               .data           = &n_waitd,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "max_peers",
-               .data           = &max_peers,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "cksum",
-               .data           = &cksum,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "ntx",
-               .data           = &ntx,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "credits",
-               .data           = &credits,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "peercredits",
-               .data           = &peercredits,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "board",
-               .data           = &board,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "ep_id",
-               .data           = &ep_id,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               .procname       = "ipif_name",
-               .data           = ipif_basename_space,
-               .maxlen         = sizeof(ipif_basename_space),
-               .mode           = 0444,
-               .proc_handler   = &proc_dostring
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "polling",
-               .data           = &polling,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       { 0 }
-};
-
-static struct ctl_table kmxlnd_top_ctl_table[] = {
-        {
-               INIT_CTL_NAME
-               .procname       = "mxlnd",
-               .data           = NULL,
-               .maxlen         = 0,
-               .mode           = 0555,
-               .child          = kmxlnd_ctl_table
-       },
-       { 0 }
-};
-
-void
-kmxlnd_initstrtunable(char *space, char *str, int size)
-{
-        strncpy(space, str, size);
-        space[size-1] = 0;
-}
-
-void
-kmxlnd_sysctl_init (void)
-{
-       kmxlnd_initstrtunable(ipif_basename_space, ipif_name,
-                             sizeof(ipif_basename_space));
-
-       kmxlnd_tunables.kib_sysctl =
-               register_sysctl_table(kmxlnd_top_ctl_table);
-
-       if (kmxlnd_tunables.kib_sysctl == NULL)
-               CWARN("Can't setup /proc tunables\n");
-}
-
-void
-kmxlnd_sysctl_fini (void)
-{
-       if (kmxlnd_tunables.kib_sysctl != NULL)
-               unregister_sysctl_table(kmxlnd_tunables.kib_sysctl);
-}
-
-#else
-
-void
-kmxlnd_sysctl_init (void)
-{
-}
-
-void
-kmxlnd_sysctl_fini (void)
-{
-}
-
-#endif
-
-int
-kmxlnd_tunables_init (void)
-{
-        kmxlnd_sysctl_init();
-        return 0;
-}
-
-void
-kmxlnd_tunables_fini (void)
-{
-        kmxlnd_sysctl_fini();
-}
diff --git a/lnet/klnds/qswlnd/Makefile.in b/lnet/klnds/qswlnd/Makefile.in
deleted file mode 100644 (file)
index b623e02..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES := kqswlnd
-kqswlnd-objs := qswlnd.o qswlnd_cb.o qswlnd_modparams.o
-
-EXTRA_POST_CFLAGS := @QSWCPPFLAGS@ -I/usr/include
-
-@INCLUDE_RULES@
diff --git a/lnet/klnds/qswlnd/autoMakefile.am b/lnet/klnds/qswlnd/autoMakefile.am
deleted file mode 100644 (file)
index 9a5c168..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# GPL HEADER START
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 only,
-# as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License version 2 for more details (a copy is included
-# in the LICENSE file that accompanied this code).
-#
-# You should have received a copy of the GNU General Public License
-# version 2 along with this program; If not, see
-# http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-#
-# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-# CA 95054 USA or visit www.sun.com if you need additional information or
-# have any questions.
-#
-# GPL HEADER END
-#
-
-#
-# Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# This file is part of Lustre, http://www.lustre.org/
-# Lustre is a trademark of Sun Microsystems, Inc.
-#
-
-if MODULES
-if BUILD_QSWLND
-modulenet_DATA = kqswlnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-EXTRA_DIST = $(kqswlnd-objs:%.o=%.c) qswlnd.h
diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c
deleted file mode 100644 (file)
index fa8e8f4..0000000
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/qswlnd/qswlnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "qswlnd.h"
-
-
-lnd_t the_kqswlnd =
-{
-       .lnd_type       = QSWLND,
-       .lnd_startup    = kqswnal_startup,
-       .lnd_shutdown   = kqswnal_shutdown,
-       .lnd_ctl        = kqswnal_ctl,
-       .lnd_send       = kqswnal_send,
-        .lnd_recv       = kqswnal_recv,
-};
-
-kqswnal_data_t         kqswnal_data;
-
-int
-kqswnal_get_tx_desc (struct libcfs_ioctl_data *data)
-{
-       unsigned long      flags;
-       cfs_list_t        *tmp;
-       kqswnal_tx_t      *ktx;
-       lnet_hdr_t        *hdr;
-       int                index = data->ioc_count;
-       int                rc = -ENOENT;
-
-       spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags);
-
-       cfs_list_for_each (tmp, &kqswnal_data.kqn_activetxds) {
-               if (index-- != 0)
-                       continue;
-
-               ktx = cfs_list_entry (tmp, kqswnal_tx_t, ktx_list);
-               hdr = (lnet_hdr_t *)ktx->ktx_buffer;
-
-               data->ioc_count  = le32_to_cpu(hdr->payload_length);
-               data->ioc_nid    = le64_to_cpu(hdr->dest_nid);
-               data->ioc_u64[0] = ktx->ktx_nid;
-               data->ioc_u32[0] = le32_to_cpu(hdr->type);
-               data->ioc_u32[1] = ktx->ktx_launcher;
-               data->ioc_flags  =
-                        (cfs_list_empty (&ktx->ktx_schedlist) ? 0 : 1) |
-                                        (ktx->ktx_state << 2);
-               rc = 0;
-               break;
-       }
-
-       spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
-       return (rc);
-}
-
-int
-kqswnal_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
-       struct libcfs_ioctl_data *data = arg;
-
-       LASSERT (ni == kqswnal_data.kqn_ni);
-
-       switch (cmd) {
-       case IOC_LIBCFS_GET_TXDESC:
-               return (kqswnal_get_tx_desc (data));
-
-       case IOC_LIBCFS_REGISTER_MYNID:
-               if (data->ioc_nid == ni->ni_nid)
-                       return 0;
-
-               LASSERT (LNET_NIDNET(data->ioc_nid) == LNET_NIDNET(ni->ni_nid));
-
-               CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID for %s(%s)\n",
-                      libcfs_nid2str(data->ioc_nid),
-                      libcfs_nid2str(ni->ni_nid));
-               return 0;
-
-       default:
-               return (-EINVAL);
-       }
-}
-
-void
-kqswnal_shutdown(lnet_ni_t *ni)
-{
-       unsigned long flags;
-       kqswnal_tx_t *ktx;
-       kqswnal_rx_t *krx;
-       
-       CDEBUG (D_NET, "shutdown\n");
-       LASSERT (ni->ni_data == &kqswnal_data);
-       LASSERT (ni == kqswnal_data.kqn_ni);
-
-       switch (kqswnal_data.kqn_init)
-       {
-       default:
-               LASSERT (0);
-
-       case KQN_INIT_ALL:
-       case KQN_INIT_DATA:
-               break;
-       }
-
-       /**********************************************************************/
-       /* Signal the start of shutdown... */
-       spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags);
-       kqswnal_data.kqn_shuttingdown = 1;
-       spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
-
-       /**********************************************************************/
-       /* wait for sends that have allocated a tx desc to launch or give up */
-       while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) {
-               CDEBUG(D_NET, "waiting for %d pending sends\n",
-                      atomic_read (&kqswnal_data.kqn_pending_txs));
-               cfs_pause(cfs_time_seconds(1));
-       }
-
-       /**********************************************************************/
-       /* close elan comms */
-       /* Shut down receivers first; rx callbacks might try sending... */
-       if (kqswnal_data.kqn_eprx_small != NULL)
-               ep_free_rcvr (kqswnal_data.kqn_eprx_small);
-
-       if (kqswnal_data.kqn_eprx_large != NULL)
-               ep_free_rcvr (kqswnal_data.kqn_eprx_large);
-
-       /* NB ep_free_rcvr() returns only after we've freed off all receive
-        * buffers (see shutdown handling in kqswnal_requeue_rx()).  This
-        * means we must have completed any messages we passed to
-        * lnet_parse() */
-
-       if (kqswnal_data.kqn_eptx != NULL)
-               ep_free_xmtr (kqswnal_data.kqn_eptx);
-
-       /* NB ep_free_xmtr() returns only after all outstanding transmits
-        * have called their callback... */
-       LASSERT(cfs_list_empty(&kqswnal_data.kqn_activetxds));
-
-       /**********************************************************************/
-       /* flag threads to terminate, wake them and wait for them to die */
-       kqswnal_data.kqn_shuttingdown = 2;
-       wake_up_all (&kqswnal_data.kqn_sched_waitq);
-
-       while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
-               CDEBUG(D_NET, "waiting for %d threads to terminate\n",
-                      atomic_read (&kqswnal_data.kqn_nthreads));
-               cfs_pause(cfs_time_seconds(1));
-       }
-
-       /**********************************************************************/
-       /* No more threads.  No more portals, router or comms callbacks!
-        * I control the horizontals and the verticals...
-        */
-
-       LASSERT (cfs_list_empty (&kqswnal_data.kqn_readyrxds));
-       LASSERT (cfs_list_empty (&kqswnal_data.kqn_donetxds));
-       LASSERT (cfs_list_empty (&kqswnal_data.kqn_delayedtxds));
-
-       /**********************************************************************/
-       /* Unmap message buffers and free all descriptors and buffers
-        */
-
-       /* FTTB, we need to unmap any remaining mapped memory.  When
-        * ep_dvma_release() get fixed (and releases any mappings in the
-        * region), we can delete all the code from here -------->  */
-
-       for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx = ktx->ktx_alloclist) {
-               /* If ktx has a buffer, it got mapped; unmap now.  NB only
-                * the pre-mapped stuff is still mapped since all tx descs
-                * must be idle */
-
-               if (ktx->ktx_buffer != NULL)
-                       ep_dvma_unload(kqswnal_data.kqn_ep,
-                                      kqswnal_data.kqn_ep_tx_nmh,
-                                      &ktx->ktx_ebuffer);
-       }
-
-       for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) {
-               /* If krx_kiov[0].kiov_page got allocated, it got mapped.  
-                * NB subsequent pages get merged */
-
-               if (krx->krx_kiov[0].kiov_page != NULL)
-                       ep_dvma_unload(kqswnal_data.kqn_ep,
-                                      kqswnal_data.kqn_ep_rx_nmh,
-                                      &krx->krx_elanbuffer);
-       }
-       /* <----------- to here */
-
-       if (kqswnal_data.kqn_ep_rx_nmh != NULL)
-               ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh);
-
-       if (kqswnal_data.kqn_ep_tx_nmh != NULL)
-               ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh);
-
-       while (kqswnal_data.kqn_txds != NULL) {
-               ktx = kqswnal_data.kqn_txds;
-
-               if (ktx->ktx_buffer != NULL)
-                       LIBCFS_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
-
-               kqswnal_data.kqn_txds = ktx->ktx_alloclist;
-               LIBCFS_FREE(ktx, sizeof(*ktx));
-       }
-
-       while (kqswnal_data.kqn_rxds != NULL) {
-               int           i;
-
-               krx = kqswnal_data.kqn_rxds;
-               for (i = 0; i < krx->krx_npages; i++)
-                       if (krx->krx_kiov[i].kiov_page != NULL)
-                               __free_page (krx->krx_kiov[i].kiov_page);
-
-               kqswnal_data.kqn_rxds = krx->krx_alloclist;
-               LIBCFS_FREE(krx, sizeof (*krx));
-       }
-
-       /* resets flags, pointers to NULL etc */
-       memset(&kqswnal_data, 0, sizeof (kqswnal_data));
-
-       CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&libcfs_kmemory));
-
-       module_put(THIS_MODULE);
-}
-
-int
-kqswnal_startup (lnet_ni_t *ni)
-{
-       EP_RAILMASK       all_rails = EP_RAILMASK_ALL;
-       int               rc;
-       int               i;
-       kqswnal_rx_t     *krx;
-       kqswnal_tx_t     *ktx;
-       int               elan_page_idx;
-
-       LASSERT (ni->ni_lnd == &the_kqswlnd);
-
-       /* Only 1 instance supported */
-       if (kqswnal_data.kqn_init != KQN_INIT_NOTHING) {
-                CERROR ("Only 1 instance supported\n");
-                return -EPERM;
-        }
-
-        if (ni->ni_interfaces[0] != NULL) {
-                CERROR("Explicit interface config not supported\n");
-                return -EPERM;
-        }
-
-       if (*kqswnal_tunables.kqn_credits >=
-           *kqswnal_tunables.kqn_ntxmsgs) {
-               LCONSOLE_ERROR_MSG(0x12e, "Configuration error: please set "
-                                  "ntxmsgs(%d) > credits(%d)\n",
-                                  *kqswnal_tunables.kqn_ntxmsgs,
-                                  *kqswnal_tunables.kqn_credits);
-       }
-        
-       CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&libcfs_kmemory));
-       
-       /* ensure all pointers NULL etc */
-       memset (&kqswnal_data, 0, sizeof (kqswnal_data));
-
-       kqswnal_data.kqn_ni = ni;
-       ni->ni_data = &kqswnal_data;
-       ni->ni_peertxcredits = *kqswnal_tunables.kqn_peercredits;
-       ni->ni_maxtxcredits = *kqswnal_tunables.kqn_credits;
-
-       CFS_INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
-       CFS_INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
-       spin_lock_init(&kqswnal_data.kqn_idletxd_lock);
-
-       CFS_INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
-       CFS_INIT_LIST_HEAD (&kqswnal_data.kqn_donetxds);
-       CFS_INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
-
-       spin_lock_init(&kqswnal_data.kqn_sched_lock);
-       init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
-
-       /* pointers/lists/locks initialised */
-       kqswnal_data.kqn_init = KQN_INIT_DATA;
-       try_module_get(THIS_MODULE);
-       
-       kqswnal_data.kqn_ep = ep_system();
-       if (kqswnal_data.kqn_ep == NULL) {
-               CERROR("Can't initialise EKC\n");
-               kqswnal_shutdown(ni);
-               return (-ENODEV);
-       }
-
-       if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
-               CERROR("Can't get elan ID\n");
-               kqswnal_shutdown(ni);
-               return (-ENODEV);
-       }
-
-       kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_ep);
-       kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_ep);
-
-       ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), kqswnal_data.kqn_elanid);
-       
-       /**********************************************************************/
-       /* Get the transmitter */
-
-       kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep);
-       if (kqswnal_data.kqn_eptx == NULL)
-       {
-               CERROR ("Can't allocate transmitter\n");
-               kqswnal_shutdown (ni);
-               return (-ENOMEM);
-       }
-
-       /**********************************************************************/
-       /* Get the receivers */
-
-       kqswnal_data.kqn_eprx_small = 
-               ep_alloc_rcvr (kqswnal_data.kqn_ep,
-                              EP_MSG_SVC_PORTALS_SMALL,
-                              *kqswnal_tunables.kqn_ep_envelopes_small);
-       if (kqswnal_data.kqn_eprx_small == NULL)
-       {
-               CERROR ("Can't install small msg receiver\n");
-               kqswnal_shutdown (ni);
-               return (-ENOMEM);
-       }
-
-       kqswnal_data.kqn_eprx_large = 
-               ep_alloc_rcvr (kqswnal_data.kqn_ep,
-                              EP_MSG_SVC_PORTALS_LARGE,
-                              *kqswnal_tunables.kqn_ep_envelopes_large);
-       if (kqswnal_data.kqn_eprx_large == NULL)
-       {
-               CERROR ("Can't install large msg receiver\n");
-               kqswnal_shutdown (ni);
-               return (-ENOMEM);
-       }
-
-       /**********************************************************************/
-       /* Reserve Elan address space for transmit descriptors NB we may
-        * either send the contents of associated buffers immediately, or
-        * map them for the peer to suck/blow... */
-       kqswnal_data.kqn_ep_tx_nmh = 
-               ep_dvma_reserve(kqswnal_data.kqn_ep,
-                               KQSW_NTXMSGPAGES*(*kqswnal_tunables.kqn_ntxmsgs),
-                               EP_PERM_WRITE);
-       if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
-               CERROR("Can't reserve tx dma space\n");
-               kqswnal_shutdown(ni);
-               return (-ENOMEM);
-       }
-
-       /**********************************************************************/
-       /* Reserve Elan address space for receive buffers */
-       kqswnal_data.kqn_ep_rx_nmh =
-               ep_dvma_reserve(kqswnal_data.kqn_ep,
-                               KQSW_NRXMSGPAGES_SMALL * 
-                               (*kqswnal_tunables.kqn_nrxmsgs_small) +
-                               KQSW_NRXMSGPAGES_LARGE * 
-                               (*kqswnal_tunables.kqn_nrxmsgs_large),
-                               EP_PERM_WRITE);
-       if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
-               CERROR("Can't reserve rx dma space\n");
-               kqswnal_shutdown(ni);
-               return (-ENOMEM);
-       }
-
-       /**********************************************************************/
-       /* Allocate/Initialise transmit descriptors */
-
-       kqswnal_data.kqn_txds = NULL;
-       for (i = 0; i < (*kqswnal_tunables.kqn_ntxmsgs); i++)
-       {
-               int           premapped_pages;
-               int           basepage = i * KQSW_NTXMSGPAGES;
-
-               LIBCFS_ALLOC (ktx, sizeof(*ktx));
-               if (ktx == NULL) {
-                       kqswnal_shutdown (ni);
-                       return (-ENOMEM);
-               }
-
-               memset(ktx, 0, sizeof(*ktx));   /* NULL pointers; zero flags */
-               ktx->ktx_alloclist = kqswnal_data.kqn_txds;
-               kqswnal_data.kqn_txds = ktx;
-
-               LIBCFS_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
-               if (ktx->ktx_buffer == NULL)
-               {
-                       kqswnal_shutdown (ni);
-                       return (-ENOMEM);
-               }
-
-               /* Map pre-allocated buffer NOW, to save latency on transmit */
-               premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
-                                                       KQSW_TX_BUFFER_SIZE);
-               ep_dvma_load(kqswnal_data.kqn_ep, NULL, 
-                            ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE, 
-                            kqswnal_data.kqn_ep_tx_nmh, basepage,
-                            &all_rails, &ktx->ktx_ebuffer);
-
-               ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
-               ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
-
-               CFS_INIT_LIST_HEAD (&ktx->ktx_schedlist);
-
-               ktx->ktx_state = KTX_IDLE;
-               ktx->ktx_rail = -1;             /* unset rail */
-
-               cfs_list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_idletxds);
-       }
-
-       /**********************************************************************/
-       /* Allocate/Initialise receive descriptors */
-       kqswnal_data.kqn_rxds = NULL;
-       elan_page_idx = 0;
-       for (i = 0; i < *kqswnal_tunables.kqn_nrxmsgs_small + *kqswnal_tunables.kqn_nrxmsgs_large; i++)
-       {
-               EP_NMD        elanbuffer;
-               int           j;
-
-               LIBCFS_ALLOC(krx, sizeof(*krx));
-               if (krx == NULL) {
-                       kqswnal_shutdown(ni);
-                       return (-ENOMEM);
-               }
-
-               memset(krx, 0, sizeof(*krx)); /* clear flags, null pointers etc */
-               krx->krx_alloclist = kqswnal_data.kqn_rxds;
-               kqswnal_data.kqn_rxds = krx;
-
-               if (i < *kqswnal_tunables.kqn_nrxmsgs_small)
-               {
-                       krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
-                       krx->krx_eprx   = kqswnal_data.kqn_eprx_small;
-               }
-               else
-               {
-                       krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
-                       krx->krx_eprx   = kqswnal_data.kqn_eprx_large;
-               }
-
-               LASSERT (krx->krx_npages > 0);
-               for (j = 0; j < krx->krx_npages; j++)
-               {
-                       struct page *page = alloc_page(GFP_KERNEL);
-                       
-                       if (page == NULL) {
-                               kqswnal_shutdown (ni);
-                               return (-ENOMEM);
-                       }
-
-                       krx->krx_kiov[j] = (lnet_kiov_t) {.kiov_page = page,
-                                                         .kiov_offset = 0,
-                                                         .kiov_len = PAGE_SIZE};
-                       LASSERT(page_address(page) != NULL);
-
-                       ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-                                    page_address(page),
-                                    PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
-                                    elan_page_idx, &all_rails, &elanbuffer);
-                       
-                       if (j == 0) {
-                               krx->krx_elanbuffer = elanbuffer;
-                       } else {
-                               rc = ep_nmd_merge(&krx->krx_elanbuffer,
-                                                 &krx->krx_elanbuffer, 
-                                                 &elanbuffer);
-                               /* NB contiguous mapping */
-                               LASSERT(rc);
-                       }
-                       elan_page_idx++;
-
-               }
-       }
-       LASSERT (elan_page_idx ==
-                (*kqswnal_tunables.kqn_nrxmsgs_small * KQSW_NRXMSGPAGES_SMALL) +
-                (*kqswnal_tunables.kqn_nrxmsgs_large * KQSW_NRXMSGPAGES_LARGE));
-
-       /**********************************************************************/
-       /* Queue receives, now that it's OK to run their completion callbacks */
-
-       for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) {
-               /* NB this enqueue can allocate/sleep (attr == 0) */
-               krx->krx_state = KRX_POSTED;
-               rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
-                                     &krx->krx_elanbuffer, 0);
-               if (rc != EP_SUCCESS) {
-                       CERROR ("failed ep_queue_receive %d\n", rc);
-                       kqswnal_shutdown (ni);
-                       return (-EIO);
-               }
-       }
-
-       /**********************************************************************/
-       /* Spawn scheduling threads */
-       for (i = 0; i < num_online_cpus(); i++) {
-               rc = kqswnal_thread_start(kqswnal_scheduler, NULL,
-                                         "kqswnal_sched");
-               if (rc != 0)
-               {
-                       CERROR ("failed to spawn scheduling thread: %d\n", rc);
-                       kqswnal_shutdown (ni);
-                       return (-ESRCH);
-               }
-       }
-
-       kqswnal_data.kqn_init = KQN_INIT_ALL;
-       return (0);
-}
-
-void __exit
-kqswnal_finalise (void)
-{
-       lnet_unregister_lnd(&the_kqswlnd);
-       kqswnal_tunables_fini();
-}
-
-static int __init
-kqswnal_initialise (void)
-{
-       int   rc = kqswnal_tunables_init();
-       
-       if (rc != 0)
-               return rc;
-
-       lnet_register_lnd(&the_kqswlnd);
-       return (0);
-}
-
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Kernel Quadrics/Elan LND v1.01");
-MODULE_LICENSE("GPL");
-
-module_init (kqswnal_initialise);
-module_exit (kqswnal_finalise);
diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h
deleted file mode 100644 (file)
index cea5d40..0000000
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/qswlnd/qswlnd.h
- *
- * Basic library routines.
- */
-
-#ifndef _QSWNAL_H
-#define _QSWNAL_H
-
-#include <qsnet/kernel.h>
-#undef printf                                   /* nasty QSW #define */
-#include <linux/module.h>
-
-#include <elan/epcomms.h>
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/buffer_head.h>  /* wait_on_buffer */
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <linux/uio.h>
-
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/sysctl.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/libcfs.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-
-/* fixed constants */
-#define KQSW_SMALLMSG                  (4<<10)  /* small/large ep receiver breakpoint */
-#define KQSW_RESCHED                    100     /* # busy loops that forces scheduler to yield */
-
-#define KQSW_CKSUM                      0       /* enable checksumming (protocol incompatible) */
-
-/*
- * derived constants
- */
-
-#define KQSW_TX_BUFFER_SIZE     (offsetof(kqswnal_msg_t, \
-                                          kqm_u.immediate.kqim_payload[*kqswnal_tunables.kqn_tx_maxcontig]))
-/* The pre-allocated tx buffer (hdr + small payload) */
-
-#define KQSW_NTXMSGPAGES        (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(LNET_MAX_PAYLOAD) + 1)
-/* Reserve elan address space for pre-allocated and pre-mapped transmit
- * buffer and a full payload too.  Extra pages allow for page alignment */
-
-#define KQSW_NRXMSGPAGES_SMALL  (btopr(KQSW_SMALLMSG))
-/* receive hdr/payload always contiguous and page aligned */
-#define KQSW_NRXMSGBYTES_SMALL  (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE)
-
-#define KQSW_NRXMSGPAGES_LARGE  (btopr(sizeof(lnet_msg_t) + LNET_MAX_PAYLOAD))
-/* receive hdr/payload always contiguous and page aligned */
-#define KQSW_NRXMSGBYTES_LARGE  (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE)
-/* biggest complete packet we can receive (or transmit) */
-
-/* Wire messages */
-/* Remote memory descriptor */
-typedef struct
-{
-        __u32            kqrmd_nfrag;           /* # frags */
-        EP_NMD           kqrmd_frag[0];         /* actual frags */
-} kqswnal_remotemd_t;
-
-/* Immediate data */
-typedef struct
-{
-        lnet_hdr_t       kqim_hdr;              /* LNET header */
-        char             kqim_payload[0];       /* piggy-backed payload */
-} WIRE_ATTR kqswnal_immediate_msg_t;
-
-/* RDMA request */
-typedef struct
-{
-        lnet_hdr_t          kqrm_hdr;           /* LNET header */
-        kqswnal_remotemd_t  kqrm_rmd;           /* peer's buffer */
-} WIRE_ATTR kqswnal_rdma_msg_t;
-
-typedef struct
-{
-        __u32            kqm_magic;             /* I'm a qswlnd message */
-        __u16            kqm_version;           /* this is my version number */
-        __u16            kqm_type;              /* msg type */
-#if KQSW_CKSUM
-        __u32            kqm_cksum;             /* crc32 checksum */
-        __u32            kqm_nob;               /* original msg length */
-#endif
-        union {
-                kqswnal_immediate_msg_t  immediate;
-                kqswnal_rdma_msg_t       rdma;
-        } WIRE_ATTR kqm_u;
-} WIRE_ATTR kqswnal_msg_t;
-
-#if KQSW_CKSUM                                           /* enable checksums ? */
-# include <linux/crc32.h>
-static inline __u32 kqswnal_csum(__u32 crc, unsigned char const *p, size_t len)
-{
-#if 1
-        return crc32_le(crc, p, len);
-#else
-        while (len-- > 0)
-                crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ;
-        return crc;
-#endif
-}
-# define QSWLND_PROTO_VERSION         0xbeef
-#else
-# define QSWLND_PROTO_VERSION         1
-#endif
-
-#define QSWLND_MSG_IMMEDIATE          0
-#define QSWLND_MSG_RDMA               1
-
-typedef union {
-        EP_STATUSBLK     ep_statusblk;
-        struct {
-                __u32       status;
-                __u32       magic;
-                __u32       version;
-                union {
-                        struct {
-                                __u32    len;
-                                __u32    cksum;
-                        } WIRE_ATTR get;
-                } WIRE_ATTR u;
-        } WIRE_ATTR     msg;
-} kqswnal_rpc_reply_t;
-
-typedef struct kqswnal_rx
-{
-        cfs_list_t           krx_list;     /* enqueue -> thread */
-        struct kqswnal_rx   *krx_alloclist;/* stack in kqn_rxds */
-        EP_RCVR             *krx_eprx;     /* port to post receives to */
-        EP_RXD              *krx_rxd;      /* receive descriptor (for repost) */
-        EP_NMD               krx_elanbuffer;/* contiguous Elan buffer */
-        int                  krx_npages;    /* # pages in receive buffer */
-        int                  krx_nob;       /* Number Of Bytes received into buffer */
-        int                  krx_rpc_reply_needed:1; /* peer waiting for EKC RPC reply */
-        int                  krx_state;     /* what this RX is doing */
-       atomic_t         krx_refcount;  /* how to tell when rpc is done */
-#if KQSW_CKSUM
-        __u32                krx_cksum;     /* checksum */
-#endif
-        kqswnal_rpc_reply_t  krx_rpc_reply; /* rpc reply status block */
-        lnet_kiov_t          krx_kiov[KQSW_NRXMSGPAGES_LARGE];/* buffer frags */
-}  kqswnal_rx_t;
-
-#define KRX_POSTED       1                      /* receiving */
-#define KRX_PARSE        2                      /* ready to be parsed */
-#define KRX_COMPLETING   3                      /* waiting to be completed */
-
-
-typedef struct kqswnal_tx
-{
-        cfs_list_t            ktx_list;         /* enqueue idle/active */
-        cfs_list_t            ktx_schedlist;    /* enqueue on scheduler */
-        struct kqswnal_tx    *ktx_alloclist;    /* stack in kqn_txds */
-        unsigned int          ktx_state:7;      /* What I'm doing */
-        unsigned int          ktx_firsttmpfrag:1;  /* ktx_frags[0] is in my ebuffer ? 0 : 1 */
-        __u32                 ktx_basepage;     /* page offset in reserved elan tx vaddrs for mapping pages */
-        int                   ktx_npages;       /* pages reserved for mapping messages */
-        int                   ktx_nmappedpages; /* # pages mapped for current message */
-        int                   ktx_port;         /* destination ep port */
-        lnet_nid_t            ktx_nid;          /* destination node */
-        void                 *ktx_args[3];      /* completion passthru */
-        char                 *ktx_buffer;       /* pre-allocated contiguous buffer for hdr + small payloads */
-        cfs_time_t            ktx_launchtime;   /* when (in jiffies) the
-                                                 * transmit was launched */
-        int                   ktx_status;       /* completion status */
-#if KQSW_CKSUM
-        __u32                 ktx_cksum;        /* optimized GET payload checksum */
-#endif
-        /* debug/info fields */
-        pid_t                 ktx_launcher;     /* pid of launching process */
-
-        int                   ktx_nfrag;        /* # message frags */
-        int                   ktx_rail;         /* preferred rail */
-        EP_NMD                ktx_ebuffer;      /* elan mapping of ktx_buffer */
-        EP_NMD                ktx_frags[EP_MAXFRAG];/* elan mapping of msg frags */
-} kqswnal_tx_t;
-
-#define KTX_IDLE        0                       /* on kqn_idletxds */
-#define KTX_SENDING     1                       /* normal send */
-#define KTX_GETTING     2                       /* sending optimised get */
-#define KTX_PUTTING     3                       /* sending optimised put */
-#define KTX_RDMA_FETCH  4                       /* handling optimised put */
-#define KTX_RDMA_STORE  5                       /* handling optimised get */
-
-typedef struct
-{
-        int               *kqn_tx_maxcontig;    /* maximum payload to defrag */
-        int               *kqn_ntxmsgs;         /* # normal tx msgs */
-        int               *kqn_credits;         /* # concurrent sends */
-        int               *kqn_peercredits;     /* # concurrent sends to 1 peer */
-        int               *kqn_nrxmsgs_large;   /* # 'large' rx msgs */
-        int               *kqn_ep_envelopes_large; /* # 'large' rx ep envelopes */
-        int               *kqn_nrxmsgs_small;   /* # 'small' rx msgs */
-        int               *kqn_ep_envelopes_small; /* # 'small' rx ep envelopes */
-        int               *kqn_optimized_puts;  /* optimized PUTs? */
-        int               *kqn_optimized_gets;  /* optimized GETs? */
-#if KQSW_CKSUM
-        int               *kqn_inject_csum_error; /* # csum errors to inject */
-#endif
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-       struct ctl_table_header *kqn_sysctl;  /* sysctl interface */
-#endif
-} kqswnal_tunables_t;
-
-typedef struct
-{
-       char                 kqn_init;        /* what's been initialised */
-       char                 kqn_shuttingdown;/* I'm trying to shut down */
-       atomic_t        kqn_nthreads;    /* # threads running */
-       lnet_ni_t           *kqn_ni;          /* _the_ instance of me */
-
-       kqswnal_rx_t        *kqn_rxds;        /* stack of all the receive descriptors */
-       kqswnal_tx_t        *kqn_txds;        /* stack of all the transmit descriptors */
-
-       cfs_list_t           kqn_idletxds;    /* transmit descriptors free to use */
-       cfs_list_t           kqn_activetxds;  /* transmit descriptors being used */
-       spinlock_t      kqn_idletxd_lock;    /* serialise idle txd access */
-       atomic_t        kqn_pending_txs;     /* # transmits being prepped */
-
-       spinlock_t      kqn_sched_lock;      /* serialise packet schedulers */
-       wait_queue_head_t    kqn_sched_waitq;/* scheduler blocks here */
-
-       cfs_list_t           kqn_readyrxds;  /* rxds full of data */
-       cfs_list_t           kqn_donetxds;   /* completed transmits */
-       cfs_list_t           kqn_delayedtxds;/* delayed transmits */
-
-       EP_SYS              *kqn_ep;         /* elan system */
-       EP_NMH              *kqn_ep_tx_nmh;  /* elan reserved tx vaddrs */
-       EP_NMH              *kqn_ep_rx_nmh;  /* elan reserved rx vaddrs */
-       EP_XMTR             *kqn_eptx;       /* elan transmitter */
-       EP_RCVR             *kqn_eprx_small; /* elan receiver (small messages) */
-       EP_RCVR             *kqn_eprx_large; /* elan receiver (large messages) */
-
-       int                  kqn_nnodes;     /* this cluster's size */
-       int                  kqn_elanid;     /* this nodes's elan ID */
-
-       EP_STATUSBLK         kqn_rpc_success;/* preset RPC reply status blocks */
-       EP_STATUSBLK         kqn_rpc_failed;
-       EP_STATUSBLK         kqn_rpc_version;/* reply to future version query */
-       EP_STATUSBLK         kqn_rpc_magic;  /* reply to future version query */
-} kqswnal_data_t;
-
-/* kqn_init state */
-#define KQN_INIT_NOTHING        0               /* MUST BE ZERO so zeroed state is initialised OK */
-#define KQN_INIT_DATA           1
-#define KQN_INIT_ALL            2
-
-extern kqswnal_tunables_t  kqswnal_tunables;
-extern kqswnal_data_t      kqswnal_data;
-
-extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
-extern void kqswnal_rxhandler(EP_RXD *rxd);
-extern int kqswnal_scheduler (void *);
-extern void kqswnal_rx_done (kqswnal_rx_t *krx);
-
-static inline lnet_nid_t
-kqswnal_elanid2nid (int elanid)
-{
-        return LNET_MKNID(LNET_NIDNET(kqswnal_data.kqn_ni->ni_nid), elanid);
-}
-
-static inline int
-kqswnal_nid2elanid (lnet_nid_t nid)
-{
-        __u32 elanid = LNET_NIDADDR(nid);
-
-        /* not in this cluster? */
-        return (elanid >= kqswnal_data.kqn_nnodes) ? -1 : elanid;
-}
-
-static inline lnet_nid_t
-kqswnal_rx_nid(kqswnal_rx_t *krx)
-{
-        return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)));
-}
-
-static inline int
-kqswnal_pages_spanned (void *base, int nob)
-{
-        unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT;
-        unsigned long last_page  = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT;
-
-        LASSERT (last_page >= first_page);      /* can't wrap address space */
-        return (last_page - first_page + 1);
-}
-
-static inline void kqswnal_rx_decref (kqswnal_rx_t *krx)
-{
-       LASSERT (atomic_read (&krx->krx_refcount) > 0);
-       if (atomic_dec_and_test (&krx->krx_refcount))
-                kqswnal_rx_done(krx);
-}
-
-int kqswnal_startup (lnet_ni_t *ni);
-void kqswnal_shutdown (lnet_ni_t *ni);
-int kqswnal_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg);
-int kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kqswnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, 
-                 int delayed, unsigned int niov, 
-                 struct iovec *iov, lnet_kiov_t *kiov,
-                 unsigned int offset, unsigned int mlen, unsigned int rlen);
-
-int kqswnal_tunables_init(void);
-void kqswnal_tunables_fini(void);
-
-#endif /* _QSWNAL_H */
diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c
deleted file mode 100644 (file)
index 99eb1cc..0000000
+++ /dev/null
@@ -1,1778 +0,0 @@
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Copyright (c) 2012, Intel Corporation.
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.lustre.org
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "qswlnd.h"
-
-void
-kqswnal_notify_peer_down(kqswnal_tx_t *ktx)
-{
-        time_t             then;
-
-        then = cfs_time_current_sec() -
-                cfs_duration_sec(cfs_time_current() -
-                                 ktx->ktx_launchtime);
-
-        lnet_notify(kqswnal_data.kqn_ni, ktx->ktx_nid, 0, then);
-}
-
-void
-kqswnal_unmap_tx (kqswnal_tx_t *ktx)
-{
-        int      i;
-
-        ktx->ktx_rail = -1;                     /* unset rail */
-
-        if (ktx->ktx_nmappedpages == 0)
-                return;
-        
-        CDEBUG(D_NET, "%p unloading %d frags starting at %d\n",
-               ktx, ktx->ktx_nfrag, ktx->ktx_firsttmpfrag);
-
-        for (i = ktx->ktx_firsttmpfrag; i < ktx->ktx_nfrag; i++)
-                ep_dvma_unload(kqswnal_data.kqn_ep,
-                               kqswnal_data.kqn_ep_tx_nmh,
-                               &ktx->ktx_frags[i]);
-
-        ktx->ktx_nmappedpages = 0;
-}
-
-int
-kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int offset, int nob, 
-                     unsigned int niov, lnet_kiov_t *kiov)
-{
-        int       nfrags    = ktx->ktx_nfrag;
-        int       nmapped   = ktx->ktx_nmappedpages;
-        int       maxmapped = ktx->ktx_npages;
-        __u32     basepage  = ktx->ktx_basepage + nmapped;
-        char     *ptr;
-
-        EP_RAILMASK railmask;
-        int         rail;
-
-        if (ktx->ktx_rail < 0)
-                ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
-                                                 EP_RAILMASK_ALL,
-                                                 kqswnal_nid2elanid(ktx->ktx_nid));
-        rail = ktx->ktx_rail;
-        if (rail < 0) {
-                CERROR("No rails available for %s\n", libcfs_nid2str(ktx->ktx_nid));
-                return (-ENETDOWN);
-        }
-        railmask = 1 << rail;
-
-        LASSERT (nmapped <= maxmapped);
-        LASSERT (nfrags >= ktx->ktx_firsttmpfrag);
-        LASSERT (nfrags <= EP_MAXFRAG);
-        LASSERT (niov > 0);
-        LASSERT (nob > 0);
-
-        /* skip complete frags before 'offset' */
-        while (offset >= kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                kiov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-
-        do {
-                int  fraglen = kiov->kiov_len - offset;
-
-                /* each page frag is contained in one page */
-                LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
-
-                if (fraglen > nob)
-                        fraglen = nob;
-
-                nmapped++;
-                if (nmapped > maxmapped) {
-                        CERROR("Can't map message in %d pages (max %d)\n",
-                               nmapped, maxmapped);
-                        return (-EMSGSIZE);
-                }
-
-                if (nfrags == EP_MAXFRAG) {
-                        CERROR("Message too fragmented in Elan VM (max %d frags)\n",
-                               EP_MAXFRAG);
-                        return (-EMSGSIZE);
-                }
-
-                /* XXX this is really crap, but we'll have to kmap until
-                 * EKC has a page (rather than vaddr) mapping interface */
-
-                ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
-
-                CDEBUG(D_NET,
-                       "%p[%d] loading %p for %d, page %d, %d total\n",
-                        ktx, nfrags, ptr, fraglen, basepage, nmapped);
-
-                ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-                             ptr, fraglen,
-                             kqswnal_data.kqn_ep_tx_nmh, basepage,
-                             &railmask, &ktx->ktx_frags[nfrags]);
-
-                if (nfrags == ktx->ktx_firsttmpfrag ||
-                    !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1],
-                                  &ktx->ktx_frags[nfrags - 1],
-                                  &ktx->ktx_frags[nfrags])) {
-                        /* new frag if this is the first or can't merge */
-                        nfrags++;
-                }
-
-                kunmap (kiov->kiov_page);
-                
-                /* keep in loop for failure case */
-                ktx->ktx_nmappedpages = nmapped;
-
-                basepage++;
-                kiov++;
-                niov--;
-                nob -= fraglen;
-                offset = 0;
-
-                /* iov must not run out before end of data */
-                LASSERT (nob == 0 || niov > 0);
-
-        } while (nob > 0);
-
-        ktx->ktx_nfrag = nfrags;
-        CDEBUG (D_NET, "%p got %d frags over %d pages\n",
-                ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages);
-
-        return (0);
-}
-
-#if KQSW_CKSUM
-__u32
-kqswnal_csum_kiov (__u32 csum, int offset, int nob, 
-                   unsigned int niov, lnet_kiov_t *kiov)
-{
-        char     *ptr;
-
-        if (nob == 0)
-                return csum;
-
-        LASSERT (niov > 0);
-        LASSERT (nob > 0);
-
-        /* skip complete frags before 'offset' */
-        while (offset >= kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                kiov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-
-        do {
-                int  fraglen = kiov->kiov_len - offset;
-
-                /* each page frag is contained in one page */
-                LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
-
-                if (fraglen > nob)
-                        fraglen = nob;
-
-                ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
-
-                csum = kqswnal_csum(csum, ptr, fraglen);
-
-                kunmap (kiov->kiov_page);
-                
-                kiov++;
-                niov--;
-                nob -= fraglen;
-                offset = 0;
-
-                /* iov must not run out before end of data */
-                LASSERT (nob == 0 || niov > 0);
-
-        } while (nob > 0);
-
-        return csum;
-}
-#endif
-
-int
-kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int offset, int nob, 
-                    unsigned int niov, struct iovec *iov)
-{
-        int       nfrags    = ktx->ktx_nfrag;
-        int       nmapped   = ktx->ktx_nmappedpages;
-        int       maxmapped = ktx->ktx_npages;
-        __u32     basepage  = ktx->ktx_basepage + nmapped;
-
-        EP_RAILMASK railmask;
-        int         rail;
-        
-        if (ktx->ktx_rail < 0)
-                ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
-                                                 EP_RAILMASK_ALL,
-                                                 kqswnal_nid2elanid(ktx->ktx_nid));
-        rail = ktx->ktx_rail;
-        if (rail < 0) {
-                CERROR("No rails available for %s\n", libcfs_nid2str(ktx->ktx_nid));
-                return (-ENETDOWN);
-        }
-        railmask = 1 << rail;
-
-        LASSERT (nmapped <= maxmapped);
-        LASSERT (nfrags >= ktx->ktx_firsttmpfrag);
-        LASSERT (nfrags <= EP_MAXFRAG);
-        LASSERT (niov > 0);
-        LASSERT (nob > 0);
-
-        /* skip complete frags before offset */
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                iov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-        
-        do {
-                int  fraglen = iov->iov_len - offset;
-                long npages;
-                
-                if (fraglen > nob)
-                        fraglen = nob;
-                npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
-
-                nmapped += npages;
-                if (nmapped > maxmapped) {
-                        CERROR("Can't map message in %d pages (max %d)\n",
-                               nmapped, maxmapped);
-                        return (-EMSGSIZE);
-                }
-
-                if (nfrags == EP_MAXFRAG) {
-                        CERROR("Message too fragmented in Elan VM (max %d frags)\n",
-                               EP_MAXFRAG);
-                        return (-EMSGSIZE);
-                }
-
-                CDEBUG(D_NET,
-                       "%p[%d] loading %p for %d, pages %d for %ld, %d total\n",
-                       ktx, nfrags, iov->iov_base + offset, fraglen, 
-                       basepage, npages, nmapped);
-
-                ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-                             iov->iov_base + offset, fraglen,
-                             kqswnal_data.kqn_ep_tx_nmh, basepage,
-                             &railmask, &ktx->ktx_frags[nfrags]);
-
-                if (nfrags == ktx->ktx_firsttmpfrag ||
-                    !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1],
-                                  &ktx->ktx_frags[nfrags - 1],
-                                  &ktx->ktx_frags[nfrags])) {
-                        /* new frag if this is the first or can't merge */
-                        nfrags++;
-                }
-
-                /* keep in loop for failure case */
-                ktx->ktx_nmappedpages = nmapped;
-
-                basepage += npages;
-                iov++;
-                niov--;
-                nob -= fraglen;
-                offset = 0;
-
-                /* iov must not run out before end of data */
-                LASSERT (nob == 0 || niov > 0);
-
-        } while (nob > 0);
-
-        ktx->ktx_nfrag = nfrags;
-        CDEBUG (D_NET, "%p got %d frags over %d pages\n",
-                ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages);
-
-        return (0);
-}
-
-#if KQSW_CKSUM
-__u32
-kqswnal_csum_iov (__u32 csum, int offset, int nob, 
-                  unsigned int niov, struct iovec *iov)
-{
-        if (nob == 0)
-                return csum;
-        
-        LASSERT (niov > 0);
-        LASSERT (nob > 0);
-
-        /* skip complete frags before offset */
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                iov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-        
-        do {
-                int  fraglen = iov->iov_len - offset;
-                
-                if (fraglen > nob)
-                        fraglen = nob;
-
-                csum = kqswnal_csum(csum, iov->iov_base + offset, fraglen);
-
-                iov++;
-                niov--;
-                nob -= fraglen;
-                offset = 0;
-
-                /* iov must not run out before end of data */
-                LASSERT (nob == 0 || niov > 0);
-
-        } while (nob > 0);
-
-        return csum;
-}
-#endif
-
-void
-kqswnal_put_idle_tx (kqswnal_tx_t *ktx)
-{
-       unsigned long     flags;
-
-       kqswnal_unmap_tx(ktx);                  /* release temporary mappings */
-       ktx->ktx_state = KTX_IDLE;
-
-       spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags);
-
-       cfs_list_del(&ktx->ktx_list);           /* take off active list */
-       cfs_list_add(&ktx->ktx_list, &kqswnal_data.kqn_idletxds);
-
-       spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
-}
-
-kqswnal_tx_t *
-kqswnal_get_idle_tx (void)
-{
-       unsigned long  flags;
-       kqswnal_tx_t  *ktx;
-
-       spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags);
-
-       if (kqswnal_data.kqn_shuttingdown ||
-           cfs_list_empty(&kqswnal_data.kqn_idletxds)) {
-               spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
-
-               return NULL;
-       }
-
-        ktx = cfs_list_entry (kqswnal_data.kqn_idletxds.next, kqswnal_tx_t,
-                              ktx_list);
-        cfs_list_del (&ktx->ktx_list);
-
-        cfs_list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds);
-        ktx->ktx_launcher = current->pid;
-       atomic_inc(&kqswnal_data.kqn_pending_txs);
-
-       spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
-
-        /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */
-        LASSERT (ktx->ktx_nmappedpages == 0);
-        return (ktx);
-}
-
-void
-kqswnal_tx_done_in_thread_context (kqswnal_tx_t *ktx)
-{
-       lnet_msg_t    *lnetmsg0 = NULL;
-       lnet_msg_t    *lnetmsg1 = NULL;
-       int            status0  = 0;
-       int            status1  = 0;
-       kqswnal_rx_t  *krx;
-
-       LASSERT (!in_interrupt());
-
-       if (ktx->ktx_status == -EHOSTDOWN)
-               kqswnal_notify_peer_down(ktx);
-
-        switch (ktx->ktx_state) {
-        case KTX_RDMA_FETCH:                    /* optimized PUT/REPLY handled */
-                krx      = (kqswnal_rx_t *)ktx->ktx_args[0];
-                lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1];
-                status0  = ktx->ktx_status;
-#if KQSW_CKSUM
-                if (status0 == 0) {             /* RDMA succeeded */
-                        kqswnal_msg_t *msg;
-                        __u32          csum;
-
-                        msg = (kqswnal_msg_t *)
-                              page_address(krx->krx_kiov[0].kiov_page);
-
-                        csum = (lnetmsg0->msg_kiov != NULL) ?
-                               kqswnal_csum_kiov(krx->krx_cksum,
-                                                 lnetmsg0->msg_offset,
-                                                 lnetmsg0->msg_wanted,
-                                                 lnetmsg0->msg_niov,
-                                                 lnetmsg0->msg_kiov) :
-                               kqswnal_csum_iov(krx->krx_cksum,
-                                                lnetmsg0->msg_offset,
-                                                lnetmsg0->msg_wanted,
-                                                lnetmsg0->msg_niov,
-                                                lnetmsg0->msg_iov);
-
-                        /* Can only check csum if I got it all */
-                        if (lnetmsg0->msg_wanted == lnetmsg0->msg_len &&
-                            csum != msg->kqm_cksum) {
-                                ktx->ktx_status = -EIO;
-                                krx->krx_rpc_reply.msg.status = -EIO;
-                                CERROR("RDMA checksum failed %u(%u) from %s\n",
-                                       csum, msg->kqm_cksum,
-                                       libcfs_nid2str(kqswnal_rx_nid(krx)));
-                        }
-                }
-#endif       
-                LASSERT (krx->krx_state == KRX_COMPLETING);
-                kqswnal_rx_decref (krx);
-                break;
-
-        case KTX_RDMA_STORE:       /* optimized GET handled */
-        case KTX_PUTTING:          /* optimized PUT sent */
-        case KTX_SENDING:          /* normal send */
-                lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1];
-                status0  = ktx->ktx_status;
-                break;
-
-        case KTX_GETTING:          /* optimized GET sent & payload received */
-                /* Complete the GET with success since we can't avoid
-                 * delivering a REPLY event; we committed to it when we
-                 * launched the GET */
-                lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1];
-                status0  = 0;
-                lnetmsg1 = (lnet_msg_t *)ktx->ktx_args[2];
-                status1  = ktx->ktx_status;
-#if KQSW_CKSUM
-                if (status1 == 0) {             /* RDMA succeeded */
-                        lnet_msg_t   *lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1];
-                        lnet_libmd_t *md = lnetmsg0->msg_md;
-                        __u32         csum;
-                
-                        csum = ((md->md_options & LNET_MD_KIOV) != 0) ? 
-                               kqswnal_csum_kiov(~0, 0,
-                                                 md->md_length,
-                                                 md->md_niov, 
-                                                 md->md_iov.kiov) :
-                               kqswnal_csum_iov(~0, 0,
-                                                md->md_length,
-                                                md->md_niov,
-                                                md->md_iov.iov);
-
-                        if (csum != ktx->ktx_cksum) {
-                                CERROR("RDMA checksum failed %u(%u) from %s\n",
-                                       csum, ktx->ktx_cksum,
-                                       libcfs_nid2str(ktx->ktx_nid));
-                                status1 = -EIO;
-                        }
-                }
-#endif                
-                break;
-
-        default:
-                LASSERT (0);
-        }
-
-        kqswnal_put_idle_tx (ktx);
-
-        lnet_finalize (kqswnal_data.kqn_ni, lnetmsg0, status0);
-        if (lnetmsg1 != NULL)
-                lnet_finalize (kqswnal_data.kqn_ni, lnetmsg1, status1);
-}
-
-void
-kqswnal_tx_done (kqswnal_tx_t *ktx, int status)
-{
-       unsigned long      flags;
-
-       ktx->ktx_status = status;
-
-       if (!in_interrupt()) {
-               kqswnal_tx_done_in_thread_context(ktx);
-               return;
-       }
-
-       /* Complete the send in thread context */
-       spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
-
-       cfs_list_add_tail(&ktx->ktx_schedlist,
-                          &kqswnal_data.kqn_donetxds);
-       wake_up(&kqswnal_data.kqn_sched_waitq);
-
-       spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags);
-}
-
-static void
-kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
-{
-        kqswnal_tx_t         *ktx = (kqswnal_tx_t *)arg;
-        kqswnal_rpc_reply_t  *reply;
-
-        LASSERT (txd != NULL);
-        LASSERT (ktx != NULL);
-
-        CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status);
-
-        if (status != EP_SUCCESS) {
-
-                CNETERR("Tx completion to %s failed: %d\n",
-                        libcfs_nid2str(ktx->ktx_nid), status);
-
-                status = -EHOSTDOWN;
-
-        } else switch (ktx->ktx_state) {
-
-        case KTX_GETTING:
-        case KTX_PUTTING:
-                /* RPC complete! */
-                reply = (kqswnal_rpc_reply_t *)ep_txd_statusblk(txd);
-                if (reply->msg.magic == 0) {    /* "old" peer */
-                        status = reply->msg.status;
-                        break;
-                }
-                
-                if (reply->msg.magic != LNET_PROTO_QSW_MAGIC) {
-                        if (reply->msg.magic != swab32(LNET_PROTO_QSW_MAGIC)) {
-                                CERROR("%s unexpected rpc reply magic %08x\n",
-                                       libcfs_nid2str(ktx->ktx_nid),
-                                       reply->msg.magic);
-                                status = -EPROTO;
-                                break;
-                        }
-
-                        __swab32s(&reply->msg.status);
-                        __swab32s(&reply->msg.version);
-                        
-                        if (ktx->ktx_state == KTX_GETTING) {
-                                __swab32s(&reply->msg.u.get.len);
-                                __swab32s(&reply->msg.u.get.cksum);
-                        }
-                }
-                        
-                status = reply->msg.status;
-                if (status != 0) {
-                        CERROR("%s RPC status %08x\n",
-                               libcfs_nid2str(ktx->ktx_nid), status);
-                        break;
-                }
-
-                if (ktx->ktx_state == KTX_GETTING) {
-                        lnet_set_reply_msg_len(kqswnal_data.kqn_ni,
-                                               (lnet_msg_t *)ktx->ktx_args[2],
-                                               reply->msg.u.get.len);
-#if KQSW_CKSUM
-                        ktx->ktx_cksum = reply->msg.u.get.cksum;
-#endif
-                }
-                break;
-                
-        case KTX_SENDING:
-                status = 0;
-                break;
-                
-        default:
-                LBUG();
-                break;
-        }
-
-        kqswnal_tx_done(ktx, status);
-}
-
-int
-kqswnal_launch (kqswnal_tx_t *ktx)
-{
-       /* Don't block for transmit descriptor if we're in interrupt context */
-       int   attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0;
-       int   dest = kqswnal_nid2elanid (ktx->ktx_nid);
-       unsigned long flags;
-       int   rc;
-
-       ktx->ktx_launchtime = cfs_time_current();
-
-        if (kqswnal_data.kqn_shuttingdown)
-                return (-ESHUTDOWN);
-
-        LASSERT (dest >= 0);                    /* must be a peer */
-
-        if (ktx->ktx_nmappedpages != 0)
-                attr = EP_SET_PREFRAIL(attr, ktx->ktx_rail);
-
-        switch (ktx->ktx_state) {
-        case KTX_GETTING:
-        case KTX_PUTTING:
-                if (the_lnet.ln_testprotocompat != 0) {
-                        kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer;
-
-                        /* single-shot proto test:
-                         * Future version queries will use an RPC, so I'll
-                         * co-opt one of the existing ones */
-                        LNET_LOCK();
-                        if ((the_lnet.ln_testprotocompat & 1) != 0) {
-                                msg->kqm_version++;
-                                the_lnet.ln_testprotocompat &= ~1;
-                        }
-                        if ((the_lnet.ln_testprotocompat & 2) != 0) {
-                                msg->kqm_magic = LNET_PROTO_MAGIC;
-                                the_lnet.ln_testprotocompat &= ~2;
-                        }
-                        LNET_UNLOCK();
-                }
-
-                /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t.
-                 * The other frags are the payload, awaiting RDMA */
-                rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
-                                     ktx->ktx_port, attr,
-                                     kqswnal_txhandler, ktx,
-                                     NULL, ktx->ktx_frags, 1);
-                break;
-
-        case KTX_SENDING:
-                rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest,
-                                         ktx->ktx_port, attr,
-                                         kqswnal_txhandler, ktx,
-                                         NULL, ktx->ktx_frags, ktx->ktx_nfrag);
-                break;
-
-        default:
-                LBUG();
-                rc = -EINVAL;                   /* no compiler warning please */
-                break;
-        }
-
-        switch (rc) {
-        case EP_SUCCESS: /* success */
-                return (0);
-
-        case EP_ENOMEM: /* can't allocate ep txd => queue for later */
-               spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
-
-               cfs_list_add_tail(&ktx->ktx_schedlist,
-                                 &kqswnal_data.kqn_delayedtxds);
-               wake_up(&kqswnal_data.kqn_sched_waitq);
-
-               spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-                                            flags);
-                return (0);
-
-        default: /* fatal error */
-                CNETERR ("Tx to %s failed: %d\n",
-                        libcfs_nid2str(ktx->ktx_nid), rc);
-                kqswnal_notify_peer_down(ktx);
-                return (-EHOSTUNREACH);
-        }
-}
-
-#if 0
-static char *
-hdr_type_string (lnet_hdr_t *hdr)
-{
-        switch (hdr->type) {
-        case LNET_MSG_ACK:
-                return ("ACK");
-        case LNET_MSG_PUT:
-                return ("PUT");
-        case LNET_MSG_GET:
-                return ("GET");
-        case LNET_MSG_REPLY:
-                return ("REPLY");
-        default:
-                return ("<UNKNOWN>");
-        }
-}
-
-static void
-kqswnal_cerror_hdr(lnet_hdr_t * hdr)
-{
-        char *type_str = hdr_type_string (hdr);
-
-        CERROR("P3 Header at %p of type %s length %d\n", hdr, type_str,
-               le32_to_cpu(hdr->payload_length));
-        CERROR("    From nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->src_nid),
-               le32_to_cpu(hdr->src_pid));
-        CERROR("    To nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->dest_nid),
-               le32_to_cpu(hdr->dest_pid));
-
-        switch (le32_to_cpu(hdr->type)) {
-        case LNET_MSG_PUT:
-                CERROR("    Ptl index %d, ack md "LPX64"."LPX64", "
-                       "match bits "LPX64"\n",
-                       le32_to_cpu(hdr->msg.put.ptl_index),
-                       hdr->msg.put.ack_wmd.wh_interface_cookie,
-                       hdr->msg.put.ack_wmd.wh_object_cookie,
-                       le64_to_cpu(hdr->msg.put.match_bits));
-                CERROR("    offset %d, hdr data "LPX64"\n",
-                       le32_to_cpu(hdr->msg.put.offset),
-                       hdr->msg.put.hdr_data);
-                break;
-
-        case LNET_MSG_GET:
-                CERROR("    Ptl index %d, return md "LPX64"."LPX64", "
-                       "match bits "LPX64"\n",
-                       le32_to_cpu(hdr->msg.get.ptl_index),
-                       hdr->msg.get.return_wmd.wh_interface_cookie,
-                       hdr->msg.get.return_wmd.wh_object_cookie,
-                       hdr->msg.get.match_bits);
-                CERROR("    Length %d, src offset %d\n",
-                       le32_to_cpu(hdr->msg.get.sink_length),
-                       le32_to_cpu(hdr->msg.get.src_offset));
-                break;
-
-        case LNET_MSG_ACK:
-                CERROR("    dst md "LPX64"."LPX64", manipulated length %d\n",
-                       hdr->msg.ack.dst_wmd.wh_interface_cookie,
-                       hdr->msg.ack.dst_wmd.wh_object_cookie,
-                       le32_to_cpu(hdr->msg.ack.mlength));
-                break;
-
-        case LNET_MSG_REPLY:
-                CERROR("    dst md "LPX64"."LPX64"\n",
-                       hdr->msg.reply.dst_wmd.wh_interface_cookie,
-                       hdr->msg.reply.dst_wmd.wh_object_cookie);
-        }
-
-}                               /* end of print_hdr() */
-#endif
-
-int
-kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag,
-                    int nrfrag, EP_NMD *rfrag)
-{
-        int  i;
-
-        if (nlfrag != nrfrag) {
-                CERROR("Can't cope with unequal # frags: %d local %d remote\n",
-                       nlfrag, nrfrag);
-                return (-EINVAL);
-        }
-        
-        for (i = 0; i < nlfrag; i++)
-                if (lfrag[i].nmd_len != rfrag[i].nmd_len) {
-                        CERROR("Can't cope with unequal frags %d(%d):"
-                               " %d local %d remote\n",
-                               i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len);
-                        return (-EINVAL);
-                }
-        
-        return (0);
-}
-
-kqswnal_remotemd_t *
-kqswnal_get_portalscompat_rmd (kqswnal_rx_t *krx)
-{
-        /* Check that the RMD sent after the "raw" LNET header in a
-         * portals-compatible QSWLND message is OK */
-        char               *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
-        kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + sizeof(lnet_hdr_t));
-
-        /* Note RDMA addresses are sent in native endian-ness in the "old"
-         * portals protocol so no swabbing... */
-
-        if (buffer + krx->krx_nob < (char *)(rmd + 1)) {
-                /* msg too small to discover rmd size */
-                CERROR ("Incoming message [%d] too small for RMD (%d needed)\n",
-                        krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer));
-                return (NULL);
-        }
-
-        if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) {
-                /* rmd doesn't fit in the incoming message */
-                CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n",
-                        krx->krx_nob, rmd->kqrmd_nfrag,
-                        (int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer));
-                return (NULL);
-        }
-
-        return (rmd);
-}
-
-void
-kqswnal_rdma_store_complete (EP_RXD *rxd) 
-{
-        int           status = ep_rxd_status(rxd);
-        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
-        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-        
-        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
-               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
-
-        LASSERT (ktx->ktx_state == KTX_RDMA_STORE);
-        LASSERT (krx->krx_rxd == rxd);
-        LASSERT (krx->krx_rpc_reply_needed);
-
-        krx->krx_rpc_reply_needed = 0;
-        kqswnal_rx_decref (krx);
-
-        /* free ktx & finalize() its lnet_msg_t */
-        kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED);
-}
-
-void
-kqswnal_rdma_fetch_complete (EP_RXD *rxd) 
-{
-        /* Completed fetching the PUT/REPLY data */
-        int           status = ep_rxd_status(rxd);
-        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
-        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-        
-        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
-               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
-
-        LASSERT (ktx->ktx_state == KTX_RDMA_FETCH);
-        LASSERT (krx->krx_rxd == rxd);
-        /* RPC completes with failure by default */
-        LASSERT (krx->krx_rpc_reply_needed);
-        LASSERT (krx->krx_rpc_reply.msg.status != 0);
-
-        if (status == EP_SUCCESS) {
-                krx->krx_rpc_reply.msg.status = 0;
-                status = 0;
-        } else {
-                /* Abandon RPC since get failed */
-                krx->krx_rpc_reply_needed = 0;
-                status = -ECONNABORTED;
-        }
-
-        /* krx gets decref'd in kqswnal_tx_done_in_thread_context() */
-        LASSERT (krx->krx_state == KRX_PARSE);
-        krx->krx_state = KRX_COMPLETING;
-
-        /* free ktx & finalize() its lnet_msg_t */
-        kqswnal_tx_done(ktx, status);
-}
-
-int
-kqswnal_rdma (kqswnal_rx_t *krx, lnet_msg_t *lntmsg,
-              int type, kqswnal_remotemd_t *rmd,
-              unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
-              unsigned int offset, unsigned int len)
-{
-        kqswnal_tx_t       *ktx;
-        int                 eprc;
-        int                 rc;
-
-        /* Not both mapped and paged payload */
-        LASSERT (iov == NULL || kiov == NULL);
-        /* RPC completes with failure by default */
-        LASSERT (krx->krx_rpc_reply_needed);
-        LASSERT (krx->krx_rpc_reply.msg.status != 0);
-
-        if (len == 0) {
-                /* data got truncated to nothing. */
-                lnet_finalize(kqswnal_data.kqn_ni, lntmsg, 0);
-                /* Let kqswnal_rx_done() complete the RPC with success */
-                krx->krx_rpc_reply.msg.status = 0;
-                return (0);
-        }
-        
-        /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not
-           actually sending a portals message with it */
-        ktx = kqswnal_get_idle_tx();
-        if (ktx == NULL) {
-                CERROR ("Can't get txd for RDMA with %s\n",
-                        libcfs_nid2str(kqswnal_rx_nid(krx)));
-                return (-ENOMEM);
-        }
-
-        ktx->ktx_state   = type;
-        ktx->ktx_nid     = kqswnal_rx_nid(krx);
-        ktx->ktx_args[0] = krx;
-        ktx->ktx_args[1] = lntmsg;
-
-       LASSERT (atomic_read(&krx->krx_refcount) > 0);
-        /* Take an extra ref for the completion callback */
-       atomic_inc(&krx->krx_refcount);
-
-        /* Map on the rail the RPC prefers */
-        ktx->ktx_rail = ep_rcvr_prefrail(krx->krx_eprx,
-                                         ep_rxd_railmask(krx->krx_rxd));
-
-        /* Start mapping at offset 0 (we're not mapping any headers) */
-        ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
-        
-        if (kiov != NULL)
-                rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov);
-        else
-                rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov);
-
-        if (rc != 0) {
-                CERROR ("Can't map local RDMA data: %d\n", rc);
-                goto out;
-        }
-
-        rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags,
-                                 rmd->kqrmd_nfrag, rmd->kqrmd_frag);
-        if (rc != 0) {
-                CERROR ("Incompatible RDMA descriptors\n");
-                goto out;
-        }
-
-        switch (type) {
-        default:
-                LBUG();
-                
-        case KTX_RDMA_STORE:
-                krx->krx_rpc_reply.msg.status    = 0;
-                krx->krx_rpc_reply.msg.magic     = LNET_PROTO_QSW_MAGIC;
-                krx->krx_rpc_reply.msg.version   = QSWLND_PROTO_VERSION;
-                krx->krx_rpc_reply.msg.u.get.len = len;
-#if KQSW_CKSUM
-                krx->krx_rpc_reply.msg.u.get.cksum = (kiov != NULL) ?
-                            kqswnal_csum_kiov(~0, offset, len, niov, kiov) :
-                            kqswnal_csum_iov(~0, offset, len, niov, iov);
-                if (*kqswnal_tunables.kqn_inject_csum_error == 4) {
-                        krx->krx_rpc_reply.msg.u.get.cksum++;
-                        *kqswnal_tunables.kqn_inject_csum_error = 0;
-                }
-#endif
-                eprc = ep_complete_rpc(krx->krx_rxd, 
-                                       kqswnal_rdma_store_complete, ktx, 
-                                       &krx->krx_rpc_reply.ep_statusblk, 
-                                       ktx->ktx_frags, rmd->kqrmd_frag, 
-                                       rmd->kqrmd_nfrag);
-                if (eprc != EP_SUCCESS) {
-                        CERROR("can't complete RPC: %d\n", eprc);
-                        /* don't re-attempt RPC completion */
-                        krx->krx_rpc_reply_needed = 0;
-                        rc = -ECONNABORTED;
-                }
-                break;
-                
-        case KTX_RDMA_FETCH:
-                eprc = ep_rpc_get (krx->krx_rxd, 
-                                   kqswnal_rdma_fetch_complete, ktx,
-                                   rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag);
-                if (eprc != EP_SUCCESS) {
-                        CERROR("ep_rpc_get failed: %d\n", eprc);
-                        /* Don't attempt RPC completion: 
-                         * EKC nuked it when the get failed */
-                        krx->krx_rpc_reply_needed = 0;
-                        rc = -ECONNABORTED;
-                }
-                break;
-        }
-
- out:
-        if (rc != 0) {
-                kqswnal_rx_decref(krx);                 /* drop callback's ref */
-                kqswnal_put_idle_tx (ktx);
-        }
-
-       atomic_dec(&kqswnal_data.kqn_pending_txs);
-        return (rc);
-}
-
-int
-kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
-        lnet_hdr_t       *hdr = &lntmsg->msg_hdr;
-        int               type = lntmsg->msg_type;
-        lnet_process_id_t target = lntmsg->msg_target;
-        int               target_is_router = lntmsg->msg_target_is_router;
-        int               routing = lntmsg->msg_routing;
-        unsigned int      payload_niov = lntmsg->msg_niov;
-        struct iovec     *payload_iov = lntmsg->msg_iov;
-        lnet_kiov_t      *payload_kiov = lntmsg->msg_kiov;
-        unsigned int      payload_offset = lntmsg->msg_offset;
-        unsigned int      payload_nob = lntmsg->msg_len;
-        int               nob;
-        kqswnal_tx_t     *ktx;
-        int               rc;
-
-        /* NB 1. hdr is in network byte order */
-        /*    2. 'private' depends on the message type */
-        
-        CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
-               payload_nob, payload_niov, libcfs_id2str(target));
-
-       LASSERT (payload_nob == 0 || payload_niov > 0);
-       LASSERT (payload_niov <= LNET_MAX_IOV);
-
-       /* It must be OK to kmap() if required */
-       LASSERT (payload_kiov == NULL || !in_interrupt ());
-       /* payload is either all vaddrs or all pages */
-       LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
-       if (kqswnal_nid2elanid (target.nid) < 0) {
-               CERROR("%s not in my cluster\n", libcfs_nid2str(target.nid));
-               return -EIO;
-       }
-
-        /* I may not block for a transmit descriptor if I might block the
-         * router, receiver, or an interrupt handler. */
-        ktx = kqswnal_get_idle_tx();
-        if (ktx == NULL) {
-                CERROR ("Can't get txd for msg type %d for %s\n",
-                        type, libcfs_nid2str(target.nid));
-                return (-ENOMEM);
-        }
-
-        ktx->ktx_state   = KTX_SENDING;
-        ktx->ktx_nid     = target.nid;
-        ktx->ktx_args[0] = private;
-        ktx->ktx_args[1] = lntmsg;
-        ktx->ktx_args[2] = NULL;    /* set when a GET commits to REPLY */
-
-        /* The first frag will be the pre-mapped buffer. */
-        ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-
-        if ((!target_is_router &&               /* target.nid is final dest */
-             !routing &&                        /* I'm the source */
-             type == LNET_MSG_GET &&            /* optimize GET? */
-             *kqswnal_tunables.kqn_optimized_gets != 0 &&
-             lntmsg->msg_md->md_length >= 
-             *kqswnal_tunables.kqn_optimized_gets) ||
-            ((type == LNET_MSG_PUT ||            /* optimize PUT? */
-              type == LNET_MSG_REPLY) &&         /* optimize REPLY? */
-             *kqswnal_tunables.kqn_optimized_puts != 0 &&
-             payload_nob >= *kqswnal_tunables.kqn_optimized_puts)) {
-                lnet_libmd_t       *md = lntmsg->msg_md;
-                kqswnal_msg_t      *msg = (kqswnal_msg_t *)ktx->ktx_buffer;
-                lnet_hdr_t         *mhdr;
-                kqswnal_remotemd_t *rmd;
-
-                /* Optimised path: I send over the Elan vaddrs of the local
-                 * buffers, and my peer DMAs directly to/from them.
-                 *
-                 * First I set up ktx as if it was going to send this
-                 * payload, (it needs to map it anyway).  This fills
-                 * ktx_frags[1] and onward with the network addresses
-                 * of the buffer frags. */
-
-                /* Send an RDMA message */
-                msg->kqm_magic = LNET_PROTO_QSW_MAGIC;
-                msg->kqm_version = QSWLND_PROTO_VERSION;
-                msg->kqm_type = QSWLND_MSG_RDMA;
-
-                mhdr = &msg->kqm_u.rdma.kqrm_hdr;
-                rmd  = &msg->kqm_u.rdma.kqrm_rmd;
-
-                *mhdr = *hdr;
-                nob = (((char *)rmd) - ktx->ktx_buffer);
-
-                if (type == LNET_MSG_GET) {
-                        if ((md->md_options & LNET_MD_KIOV) != 0) 
-                                rc = kqswnal_map_tx_kiov (ktx, 0, md->md_length,
-                                                          md->md_niov, md->md_iov.kiov);
-                        else
-                                rc = kqswnal_map_tx_iov (ktx, 0, md->md_length,
-                                                         md->md_niov, md->md_iov.iov);
-                        ktx->ktx_state = KTX_GETTING;
-                } else {
-                        if (payload_kiov != NULL)
-                                rc = kqswnal_map_tx_kiov(ktx, 0, payload_nob,
-                                                         payload_niov, payload_kiov);
-                        else
-                                rc = kqswnal_map_tx_iov(ktx, 0, payload_nob,
-                                                        payload_niov, payload_iov);
-                        ktx->ktx_state = KTX_PUTTING;
-                }
-
-                if (rc != 0)
-                        goto out;
-
-                rmd->kqrmd_nfrag = ktx->ktx_nfrag - 1;
-                nob += offsetof(kqswnal_remotemd_t,
-                                kqrmd_frag[rmd->kqrmd_nfrag]);
-                LASSERT (nob <= KQSW_TX_BUFFER_SIZE);
-
-                memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1],
-                       rmd->kqrmd_nfrag * sizeof(EP_NMD));
-
-                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob);
-#if KQSW_CKSUM
-                msg->kqm_nob   = nob + payload_nob;
-                msg->kqm_cksum = 0;
-                msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob);
-#endif
-                if (type == LNET_MSG_GET) {
-                        /* Allocate reply message now while I'm in thread context */
-                        ktx->ktx_args[2] = lnet_create_reply_msg (
-                                kqswnal_data.kqn_ni, lntmsg);
-                        if (ktx->ktx_args[2] == NULL)
-                                goto out;
-
-                        /* NB finalizing the REPLY message is my
-                         * responsibility now, whatever happens. */
-#if KQSW_CKSUM
-                        if (*kqswnal_tunables.kqn_inject_csum_error ==  3) {
-                                msg->kqm_cksum++;
-                                *kqswnal_tunables.kqn_inject_csum_error = 0;
-                        }
-
-                } else if (payload_kiov != NULL) {
-                        /* must checksum payload after header so receiver can
-                         * compute partial header cksum before swab.  Sadly
-                         * this causes 2 rounds of kmap */
-                        msg->kqm_cksum =
-                                kqswnal_csum_kiov(msg->kqm_cksum, 0, payload_nob,
-                                                  payload_niov, payload_kiov);
-                        if (*kqswnal_tunables.kqn_inject_csum_error ==  2) {
-                                msg->kqm_cksum++;
-                                *kqswnal_tunables.kqn_inject_csum_error = 0;
-                        }
-                } else {
-                        msg->kqm_cksum =
-                                kqswnal_csum_iov(msg->kqm_cksum, 0, payload_nob,
-                                                 payload_niov, payload_iov);
-                        if (*kqswnal_tunables.kqn_inject_csum_error ==  2) {
-                                msg->kqm_cksum++;
-                                *kqswnal_tunables.kqn_inject_csum_error = 0;
-                        }
-#endif
-                }
-                
-        } else if (payload_nob <= *kqswnal_tunables.kqn_tx_maxcontig) {
-                lnet_hdr_t    *mhdr;
-                char          *payload;
-                kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer;
-
-                /* single frag copied into the pre-mapped buffer */
-                msg->kqm_magic = LNET_PROTO_QSW_MAGIC;
-                msg->kqm_version = QSWLND_PROTO_VERSION;
-                msg->kqm_type = QSWLND_MSG_IMMEDIATE;
-
-                mhdr = &msg->kqm_u.immediate.kqim_hdr;
-                payload = msg->kqm_u.immediate.kqim_payload;
-
-                *mhdr = *hdr;
-                nob = (payload - ktx->ktx_buffer) + payload_nob;
-
-                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob);
-
-                if (payload_kiov != NULL)
-                        lnet_copy_kiov2flat(KQSW_TX_BUFFER_SIZE, payload, 0,
-                                            payload_niov, payload_kiov, 
-                                            payload_offset, payload_nob);
-                else
-                        lnet_copy_iov2flat(KQSW_TX_BUFFER_SIZE, payload, 0,
-                                           payload_niov, payload_iov, 
-                                           payload_offset, payload_nob);
-#if KQSW_CKSUM
-                msg->kqm_nob   = nob;
-                msg->kqm_cksum = 0;
-                msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob);
-                if (*kqswnal_tunables.kqn_inject_csum_error == 1) {
-                        msg->kqm_cksum++;
-                        *kqswnal_tunables.kqn_inject_csum_error = 0;
-                }
-#endif
-        } else {
-                lnet_hdr_t    *mhdr;
-                kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer;
-
-                /* multiple frags: first is hdr in pre-mapped buffer */
-                msg->kqm_magic = LNET_PROTO_QSW_MAGIC;
-                msg->kqm_version = QSWLND_PROTO_VERSION;
-                msg->kqm_type = QSWLND_MSG_IMMEDIATE;
-
-                mhdr = &msg->kqm_u.immediate.kqim_hdr;
-                nob = offsetof(kqswnal_msg_t, kqm_u.immediate.kqim_payload);
-
-                *mhdr = *hdr;
-
-                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob);
-
-                if (payload_kiov != NULL)
-                        rc = kqswnal_map_tx_kiov (ktx, payload_offset, payload_nob, 
-                                                  payload_niov, payload_kiov);
-                else
-                        rc = kqswnal_map_tx_iov (ktx, payload_offset, payload_nob,
-                                                 payload_niov, payload_iov);
-                if (rc != 0)
-                        goto out;
-
-#if KQSW_CKSUM
-                msg->kqm_nob   = nob + payload_nob;
-                msg->kqm_cksum = 0;
-                msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob);
-
-                msg->kqm_cksum = (payload_kiov != NULL) ?
-                                 kqswnal_csum_kiov(msg->kqm_cksum,
-                                                   payload_offset, payload_nob,
-                                                   payload_niov, payload_kiov) :
-                                 kqswnal_csum_iov(msg->kqm_cksum,
-                                                  payload_offset, payload_nob,
-                                                  payload_niov, payload_iov);
-
-                if (*kqswnal_tunables.kqn_inject_csum_error == 1) {
-                        msg->kqm_cksum++;
-                        *kqswnal_tunables.kqn_inject_csum_error = 0;
-                }
-#endif
-                nob += payload_nob;
-        }
-
-        ktx->ktx_port = (nob <= KQSW_SMALLMSG) ?
-                        EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
-
-        rc = kqswnal_launch (ktx);
-
- out:
-        CDEBUG_LIMIT(rc == 0? D_NET :D_NETERROR, "%s %d bytes to %s%s: rc %d\n",
-                     routing ? (rc == 0 ? "Routed" : "Failed to route") :
-                               (rc == 0 ? "Sent" : "Failed to send"),
-                     nob, libcfs_nid2str(target.nid),
-                     target_is_router ? "(router)" : "", rc);
-
-        if (rc != 0) {
-                lnet_msg_t *repmsg = (lnet_msg_t *)ktx->ktx_args[2];
-                int         state = ktx->ktx_state;
-
-                kqswnal_put_idle_tx (ktx);
-
-                if (state == KTX_GETTING && repmsg != NULL) {
-                        /* We committed to reply, but there was a problem
-                         * launching the GET.  We can't avoid delivering a
-                         * REPLY event since we committed above, so we
-                         * pretend the GET succeeded but the REPLY
-                         * failed. */
-                        rc = 0;
-                        lnet_finalize (kqswnal_data.kqn_ni, lntmsg, 0);
-                        lnet_finalize (kqswnal_data.kqn_ni, repmsg, -EIO);
-                }
-                
-        }
-        
-       atomic_dec(&kqswnal_data.kqn_pending_txs);
-        return (rc == 0 ? 0 : -EIO);
-}
-
-void
-kqswnal_requeue_rx (kqswnal_rx_t *krx)
-{
-       LASSERT (atomic_read(&krx->krx_refcount) == 0);
-        LASSERT (!krx->krx_rpc_reply_needed);
-
-        krx->krx_state = KRX_POSTED;
-
-        if (kqswnal_data.kqn_shuttingdown) {
-                /* free EKC rxd on shutdown */
-                ep_complete_receive(krx->krx_rxd);
-        } else {
-                /* repost receive */
-                ep_requeue_receive(krx->krx_rxd, 
-                                   kqswnal_rxhandler, krx,
-                                   &krx->krx_elanbuffer, 0);
-        }
-}
-
-void
-kqswnal_rpc_complete (EP_RXD *rxd)
-{
-        int           status = ep_rxd_status(rxd);
-        kqswnal_rx_t *krx    = (kqswnal_rx_t *)ep_rxd_arg(rxd);
-        
-        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
-               "rxd %p, krx %p, status %d\n", rxd, krx, status);
-
-        LASSERT (krx->krx_rxd == rxd);
-        LASSERT (krx->krx_rpc_reply_needed);
-        
-        krx->krx_rpc_reply_needed = 0;
-        kqswnal_requeue_rx (krx);
-}
-
-void
-kqswnal_rx_done (kqswnal_rx_t *krx) 
-{
-       int           rc;
-
-       LASSERT (atomic_read(&krx->krx_refcount) == 0);
-
-       if (krx->krx_rpc_reply_needed) {
-               /* We've not completed the peer's RPC yet... */
-               krx->krx_rpc_reply.msg.magic   = LNET_PROTO_QSW_MAGIC;
-               krx->krx_rpc_reply.msg.version = QSWLND_PROTO_VERSION;
-
-               LASSERT (!in_interrupt());
-
-               rc = ep_complete_rpc(krx->krx_rxd,
-                                    kqswnal_rpc_complete, krx,
-                                    &krx->krx_rpc_reply.ep_statusblk,
-                                    NULL, NULL, 0);
-               if (rc == EP_SUCCESS)
-                       return;
-
-               CERROR("can't complete RPC: %d\n", rc);
-               krx->krx_rpc_reply_needed = 0;
-       }
-
-       kqswnal_requeue_rx(krx);
-}
-        
-void
-kqswnal_parse (kqswnal_rx_t *krx)
-{
-        lnet_ni_t      *ni = kqswnal_data.kqn_ni;
-        kqswnal_msg_t  *msg = (kqswnal_msg_t *)page_address(krx->krx_kiov[0].kiov_page);
-        lnet_nid_t      fromnid = kqswnal_rx_nid(krx);
-        int             swab;
-        int             n;
-        int             i;
-        int             nob;
-        int             rc;
-
-       LASSERT (atomic_read(&krx->krx_refcount) == 1);
-
-        if (krx->krx_nob < offsetof(kqswnal_msg_t, kqm_u)) {
-                CERROR("Short message %d received from %s\n",
-                       krx->krx_nob, libcfs_nid2str(fromnid));
-                goto done;
-        }
-
-        swab = msg->kqm_magic == __swab32(LNET_PROTO_QSW_MAGIC);
-
-        if (swab || msg->kqm_magic == LNET_PROTO_QSW_MAGIC) {
-#if KQSW_CKSUM
-                __u32 csum0;
-                __u32 csum1;
-
-                /* csum byte array before swab */
-                csum1 = msg->kqm_cksum;
-                msg->kqm_cksum = 0;
-                csum0 = kqswnal_csum_kiov(~0, 0, krx->krx_nob,
-                                          krx->krx_npages, krx->krx_kiov);
-                msg->kqm_cksum = csum1;
-#endif
-
-                if (swab) {
-                        __swab16s(&msg->kqm_version);
-                        __swab16s(&msg->kqm_type);
-#if KQSW_CKSUM
-                        __swab32s(&msg->kqm_cksum);
-                        __swab32s(&msg->kqm_nob);
-#endif
-                }
-
-                if (msg->kqm_version != QSWLND_PROTO_VERSION) {
-                        /* Future protocol version compatibility support!
-                         * The next qswlnd-specific protocol rev will first
-                         * send an RPC to check version.
-                         * 1.4.6 and 1.4.7.early reply with a status
-                         * block containing its current version.
-                         * Later versions send a failure (-ve) status +
-                         * magic/version */
-
-                        if (!krx->krx_rpc_reply_needed) {
-                                CERROR("Unexpected version %d from %s\n",
-                                       msg->kqm_version, libcfs_nid2str(fromnid));
-                                goto done;
-                        }
-
-                        LASSERT (krx->krx_rpc_reply.msg.status == -EPROTO);
-                        goto done;
-                }
-
-                switch (msg->kqm_type) {
-                default:
-                        CERROR("Bad request type %x from %s\n",
-                               msg->kqm_type, libcfs_nid2str(fromnid));
-                        goto done;
-
-                case QSWLND_MSG_IMMEDIATE:
-                        if (krx->krx_rpc_reply_needed) {
-                                /* Should have been a simple message */
-                                CERROR("IMMEDIATE sent as RPC from %s\n",
-                                       libcfs_nid2str(fromnid));
-                                goto done;
-                        }
-
-                        nob = offsetof(kqswnal_msg_t, kqm_u.immediate.kqim_payload);
-                        if (krx->krx_nob < nob) {
-                                CERROR("Short IMMEDIATE %d(%d) from %s\n",
-                                       krx->krx_nob, nob, libcfs_nid2str(fromnid));
-                                goto done;
-                        }
-
-#if KQSW_CKSUM
-                        if (csum0 != msg->kqm_cksum) {
-                                CERROR("Bad IMMEDIATE checksum %08x(%08x) from %s\n",
-                                       csum0, msg->kqm_cksum, libcfs_nid2str(fromnid));
-                                CERROR("nob %d (%d)\n", krx->krx_nob, msg->kqm_nob);
-                                goto done;
-                        }
-#endif
-                        rc = lnet_parse(ni, &msg->kqm_u.immediate.kqim_hdr,
-                                        fromnid, krx, 0);
-                        if (rc < 0)
-                                goto done;
-                        return;
-
-                case QSWLND_MSG_RDMA:
-                        if (!krx->krx_rpc_reply_needed) {
-                                /* Should have been a simple message */
-                                CERROR("RDMA sent as simple message from %s\n",
-                                       libcfs_nid2str(fromnid));
-                                goto done;
-                        }
-
-                        nob = offsetof(kqswnal_msg_t,
-                                       kqm_u.rdma.kqrm_rmd.kqrmd_frag[0]);
-                        if (krx->krx_nob < nob) {
-                                CERROR("Short RDMA message %d(%d) from %s\n",
-                                       krx->krx_nob, nob, libcfs_nid2str(fromnid));
-                                goto done;
-                        }
-
-                        if (swab)
-                                __swab32s(&msg->kqm_u.rdma.kqrm_rmd.kqrmd_nfrag);
-
-                        n = msg->kqm_u.rdma.kqrm_rmd.kqrmd_nfrag;
-                        nob = offsetof(kqswnal_msg_t,
-                                       kqm_u.rdma.kqrm_rmd.kqrmd_frag[n]);
-
-                        if (krx->krx_nob < nob) {
-                                CERROR("short RDMA message %d(%d) from %s\n",
-                                       krx->krx_nob, nob, libcfs_nid2str(fromnid));
-                                goto done;
-                        }
-
-                        if (swab) {
-                                for (i = 0; i < n; i++) {
-                                        EP_NMD *nmd = &msg->kqm_u.rdma.kqrm_rmd.kqrmd_frag[i];
-
-                                        __swab32s(&nmd->nmd_addr);
-                                        __swab32s(&nmd->nmd_len);
-                                        __swab32s(&nmd->nmd_attr);
-                                }
-                        }
-
-#if KQSW_CKSUM
-                        krx->krx_cksum = csum0; /* stash checksum so far */
-#endif
-                        rc = lnet_parse(ni, &msg->kqm_u.rdma.kqrm_hdr,
-                                        fromnid, krx, 1);
-                        if (rc < 0)
-                                goto done;
-                        return;
-                }
-                /* Not Reached */
-        }
-
-        if (msg->kqm_magic == LNET_PROTO_MAGIC ||
-            msg->kqm_magic == __swab32(LNET_PROTO_MAGIC)) {
-                /* Future protocol version compatibility support!
-                 * When LNET unifies protocols over all LNDs, the first thing a
-                 * peer will send will be a version query RPC.  
-                 * 1.4.6 and 1.4.7.early reply with a status block containing
-                 * LNET_PROTO_QSW_MAGIC..
-                 * Later versions send a failure (-ve) status +
-                 * magic/version */
-
-                if (!krx->krx_rpc_reply_needed) {
-                        CERROR("Unexpected magic %08x from %s\n",
-                               msg->kqm_magic, libcfs_nid2str(fromnid));
-                        goto done;
-                }
-
-                LASSERT (krx->krx_rpc_reply.msg.status == -EPROTO);
-                goto done;
-        }
-
-        CERROR("Unrecognised magic %08x from %s\n",
-               msg->kqm_magic, libcfs_nid2str(fromnid));
- done:
-        kqswnal_rx_decref(krx);
-}
-
-/* Receive Interrupt Handler: posts to schedulers */
-void 
-kqswnal_rxhandler(EP_RXD *rxd)
-{
-        unsigned long flags;
-        int           nob    = ep_rxd_len (rxd);
-        int           status = ep_rxd_status (rxd);
-        kqswnal_rx_t *krx    = (kqswnal_rx_t *)ep_rxd_arg (rxd);
-        CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n",
-               rxd, krx, nob, status);
-
-        LASSERT (krx != NULL);
-        LASSERT (krx->krx_state == KRX_POSTED);
-        
-        krx->krx_state = KRX_PARSE;
-        krx->krx_rxd = rxd;
-        krx->krx_nob = nob;
-
-        /* RPC reply iff rpc request received without error */
-        krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd) &&
-                                    (status == EP_SUCCESS ||
-                                     status == EP_MSG_TOO_BIG);
-
-        /* Default to failure if an RPC reply is requested but not handled */
-        krx->krx_rpc_reply.msg.status = -EPROTO;
-       atomic_set (&krx->krx_refcount, 1);
-
-        if (status != EP_SUCCESS) {
-                /* receives complete with failure when receiver is removed */
-                if (status == EP_SHUTDOWN)
-                        LASSERT (kqswnal_data.kqn_shuttingdown);
-                else
-                        CERROR("receive status failed with status %d nob %d\n",
-                               ep_rxd_status(rxd), nob);
-               kqswnal_rx_decref(krx);
-               return;
-       }
-
-       if (!in_interrupt()) {
-               kqswnal_parse(krx);
-               return;
-       }
-
-       spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
-
-       cfs_list_add_tail(&krx->krx_list, &kqswnal_data.kqn_readyrxds);
-       wake_up(&kqswnal_data.kqn_sched_waitq);
-
-       spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags);
-}
-
-int
-kqswnal_recv (lnet_ni_t     *ni,
-              void          *private,
-              lnet_msg_t    *lntmsg,
-              int            delayed,
-              unsigned int   niov,
-              struct iovec  *iov,
-              lnet_kiov_t   *kiov,
-              unsigned int   offset,
-              unsigned int   mlen,
-              unsigned int   rlen)
-{
-       kqswnal_rx_t       *krx = (kqswnal_rx_t *)private;
-       lnet_nid_t          fromnid;
-       kqswnal_msg_t      *msg;
-       lnet_hdr_t         *hdr;
-       kqswnal_remotemd_t *rmd;
-       int                 msg_offset;
-       int                 rc;
-
-       LASSERT (!in_interrupt ());             /* OK to map */
-       /* Either all pages or all vaddrs */
-       LASSERT (!(kiov != NULL && iov != NULL));
-
-        fromnid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ep_rxd_node(krx->krx_rxd));
-        msg = (kqswnal_msg_t *)page_address(krx->krx_kiov[0].kiov_page);
-
-        if (krx->krx_rpc_reply_needed) {
-                /* optimized (rdma) request sent as RPC */
-
-                LASSERT (msg->kqm_type == QSWLND_MSG_RDMA);
-                hdr = &msg->kqm_u.rdma.kqrm_hdr;
-                rmd = &msg->kqm_u.rdma.kqrm_rmd;
-
-                /* NB header is still in wire byte order */
-
-                switch (le32_to_cpu(hdr->type)) {
-                        case LNET_MSG_PUT:
-                        case LNET_MSG_REPLY:
-                                /* This is an optimized PUT/REPLY */
-                                rc = kqswnal_rdma(krx, lntmsg, 
-                                                  KTX_RDMA_FETCH, rmd,
-                                                  niov, iov, kiov, offset, mlen);
-                                break;
-
-                        case LNET_MSG_GET:
-#if KQSW_CKSUM
-                                if (krx->krx_cksum != msg->kqm_cksum) {
-                                        CERROR("Bad GET checksum %08x(%08x) from %s\n",
-                                               krx->krx_cksum, msg->kqm_cksum,
-                                               libcfs_nid2str(fromnid));
-                                        rc = -EIO;
-                                        break;
-                                }
-#endif                                
-                                if (lntmsg == NULL) {
-                                        /* No buffer match: my decref will
-                                         * complete the RPC with failure */
-                                        rc = 0;
-                                } else {
-                                        /* Matched something! */
-                                        rc = kqswnal_rdma(krx, lntmsg,
-                                                          KTX_RDMA_STORE, rmd,
-                                                          lntmsg->msg_niov,
-                                                          lntmsg->msg_iov,
-                                                          lntmsg->msg_kiov,
-                                                          lntmsg->msg_offset,
-                                                          lntmsg->msg_len);
-                                }
-                                break;
-
-                        default:
-                                CERROR("Bad RPC type %d\n",
-                                       le32_to_cpu(hdr->type));
-                                rc = -EPROTO;
-                                break;
-                }
-
-                kqswnal_rx_decref(krx);
-                return rc;
-        }
-
-        LASSERT (msg->kqm_type == QSWLND_MSG_IMMEDIATE);
-        msg_offset = offsetof(kqswnal_msg_t, kqm_u.immediate.kqim_payload);
-        
-        if (krx->krx_nob < msg_offset + rlen) {
-                CERROR("Bad message size from %s: have %d, need %d + %d\n",
-                       libcfs_nid2str(fromnid), krx->krx_nob,
-                       msg_offset, rlen);
-                kqswnal_rx_decref(krx);
-                return -EPROTO;
-        }
-
-        if (kiov != NULL)
-                lnet_copy_kiov2kiov(niov, kiov, offset,
-                                    krx->krx_npages, krx->krx_kiov, 
-                                    msg_offset, mlen);
-        else
-                lnet_copy_kiov2iov(niov, iov, offset,
-                                   krx->krx_npages, krx->krx_kiov, 
-                                   msg_offset, mlen);
-
-        lnet_finalize(ni, lntmsg, 0);
-        kqswnal_rx_decref(krx);
-        return 0;
-}
-
-int
-kqswnal_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
-       struct task_struct *task = cfs_thread_run(fn, arg, name);
-
-       if (IS_ERR(task))
-               return PTR_ERR(task);
-
-       atomic_inc(&kqswnal_data.kqn_nthreads);
-       return 0;
-}
-
-void
-kqswnal_thread_fini (void)
-{
-       atomic_dec (&kqswnal_data.kqn_nthreads);
-}
-
-int
-kqswnal_scheduler (void *arg)
-{
-        kqswnal_rx_t    *krx;
-        kqswnal_tx_t    *ktx;
-        unsigned long    flags;
-        int              rc;
-        int              counter = 0;
-        int              did_something;
-
-        cfs_block_allsigs ();
-
-       spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
-
-        for (;;)
-        {
-                did_something = 0;
-
-                if (!cfs_list_empty (&kqswnal_data.kqn_readyrxds))
-                {
-                        krx = cfs_list_entry(kqswnal_data.kqn_readyrxds.next,
-                                             kqswnal_rx_t, krx_list);
-                        cfs_list_del (&krx->krx_list);
-                       spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-                                                   flags);
-
-                        LASSERT (krx->krx_state == KRX_PARSE);
-                        kqswnal_parse (krx);
-
-                        did_something = 1;
-                       spin_lock_irqsave(&kqswnal_data.kqn_sched_lock,
-                                              flags);
-                }
-
-                if (!cfs_list_empty (&kqswnal_data.kqn_donetxds))
-                {
-                        ktx = cfs_list_entry(kqswnal_data.kqn_donetxds.next,
-                                             kqswnal_tx_t, ktx_schedlist);
-                        cfs_list_del_init (&ktx->ktx_schedlist);
-                       spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-                                                   flags);
-
-                        kqswnal_tx_done_in_thread_context(ktx);
-
-                        did_something = 1;
-                       spin_lock_irqsave(&kqswnal_data.kqn_sched_lock,
-                                               flags);
-                }
-
-                if (!cfs_list_empty (&kqswnal_data.kqn_delayedtxds))
-                {
-                        ktx = cfs_list_entry(kqswnal_data.kqn_delayedtxds.next,
-                                             kqswnal_tx_t, ktx_schedlist);
-                        cfs_list_del_init (&ktx->ktx_schedlist);
-                       spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-                                                   flags);
-
-                        rc = kqswnal_launch (ktx);
-                        if (rc != 0) {
-                                CERROR("Failed delayed transmit to %s: %d\n", 
-                                       libcfs_nid2str(ktx->ktx_nid), rc);
-                                kqswnal_tx_done (ktx, rc);
-                        }
-                       atomic_dec (&kqswnal_data.kqn_pending_txs);
-
-                        did_something = 1;
-                       spin_lock_irqsave(&kqswnal_data.kqn_sched_lock,
-                                               flags);
-                }
-
-                /* nothing to do or hogging CPU */
-                if (!did_something || counter++ == KQSW_RESCHED) {
-                       spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-                                                   flags);
-
-                        counter = 0;
-
-                        if (!did_something) {
-                                if (kqswnal_data.kqn_shuttingdown == 2) {
-                                        /* We only exit in stage 2 of shutdown
-                                         * when there's nothing left to do */
-                                        break;
-                                }
-                               rc = wait_event_interruptible_exclusive (
-                                       kqswnal_data.kqn_sched_waitq,
-                                       kqswnal_data.kqn_shuttingdown == 2 ||
-                                       !cfs_list_empty(&kqswnal_data. \
-                                                       kqn_readyrxds) ||
-                                       !cfs_list_empty(&kqswnal_data. \
-                                                       kqn_donetxds) ||
-                                       !cfs_list_empty(&kqswnal_data. \
-                                                       kqn_delayedtxds));
-                               LASSERT (rc == 0);
-                       } else if (need_resched())
-                               schedule ();
-
-                       spin_lock_irqsave(&kqswnal_data.kqn_sched_lock,
-                                              flags);
-               }
-       }
-
-       kqswnal_thread_fini ();
-       return 0;
-}
diff --git a/lnet/klnds/qswlnd/qswlnd_modparams.c b/lnet/klnds/qswlnd/qswlnd_modparams.c
deleted file mode 100644 (file)
index f3dcfbf..0000000
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- *
- * This file is part of Portals, http://www.lustre.org
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "qswlnd.h"
-
-static int tx_maxcontig = (1<<10);
-CFS_MODULE_PARM(tx_maxcontig, "i", int, 0444,
-                "maximum payload to de-fragment");
-
-static int ntxmsgs = 512;
-CFS_MODULE_PARM(ntxmsgs, "i", int, 0444,
-                "# tx msg buffers");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
-                "# concurrent sends");
-
-static int peer_credits = 8;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
-                "# per-peer concurrent sends");
-
-static int nrxmsgs_large = 64;
-CFS_MODULE_PARM(nrxmsgs_large, "i", int, 0444,
-                "# 'large' rx msg buffers");
-
-static int ep_envelopes_large = 256;
-CFS_MODULE_PARM(ep_envelopes_large, "i", int, 0444,
-                "# 'large' rx msg envelope buffers");
-
-static int nrxmsgs_small = 256;
-CFS_MODULE_PARM(nrxmsgs_small, "i", int, 0444,
-                "# 'small' rx msg buffers");
-
-static int ep_envelopes_small = 2048;
-CFS_MODULE_PARM(ep_envelopes_small, "i", int, 0444,
-                "# 'small' rx msg envelope buffers");
-
-static int optimized_puts = (32<<10);
-CFS_MODULE_PARM(optimized_puts, "i", int, 0644,
-                "zero-copy puts >= this size");
-
-static int optimized_gets = 2048;
-CFS_MODULE_PARM(optimized_gets, "i", int, 0644,
-                "zero-copy gets >= this size");
-
-#if KQSW_CKSUM
-static int inject_csum_error = 0;
-CFS_MODULE_PARM(inject_csum_error, "i", int, 0644,
-                "test checksumming");
-#endif
-
-kqswnal_tunables_t kqswnal_tunables = {
-        .kqn_tx_maxcontig       = &tx_maxcontig,
-        .kqn_ntxmsgs            = &ntxmsgs,
-        .kqn_credits            = &credits,
-        .kqn_peercredits        = &peer_credits,
-        .kqn_nrxmsgs_large      = &nrxmsgs_large,
-        .kqn_ep_envelopes_large = &ep_envelopes_large,
-        .kqn_nrxmsgs_small      = &nrxmsgs_small,
-        .kqn_ep_envelopes_small = &ep_envelopes_small,
-        .kqn_optimized_puts     = &optimized_puts,
-        .kqn_optimized_gets     = &optimized_gets,
-#if KQSW_CKSUM
-        .kqn_inject_csum_error  = &inject_csum_error,
-#endif
-};
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-static struct ctl_table kqswnal_ctl_table[] = {
-       {
-               INIT_CTL_NAME
-               .procname       = "tx_maxcontig",
-               .data           = &tx_maxcontig,
-               .maxlen         = sizeof (int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "ntxmsgs",
-               .data           = &ntxmsgs,
-               .maxlen         = sizeof (int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "credits",
-               .data           = &credits,
-               .maxlen         = sizeof (int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "peer_credits",
-               .data           = &peer_credits,
-               .maxlen         = sizeof (int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "nrxmsgs_large",
-               .data           = &nrxmsgs_large,
-               .maxlen         = sizeof (int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "ep_envelopes_large",
-               .data           = &ep_envelopes_large,
-               .maxlen         = sizeof (int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "nrxmsgs_small",
-               .data           = &nrxmsgs_small,
-               .maxlen         = sizeof (int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "ep_envelopes_small",
-               .data           = &ep_envelopes_small,
-               .maxlen         = sizeof (int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "optimized_puts",
-               .data           = &optimized_puts,
-               .maxlen         = sizeof (int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "optimized_gets",
-               .data           = &optimized_gets,
-               .maxlen         = sizeof (int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec
-       },
-#if KQSW_CKSUM
-       {
-               INIT_CTL_NAME
-               .procname       = "inject_csum_error",
-               .data           = &inject_csum_error,
-               .maxlen         = sizeof (int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec
-       },
-#endif
-       { 0 }
-};
-
-static struct ctl_table kqswnal_top_ctl_table[] = {
-       {
-               INIT_CTL_NAME
-               .procname       = "qswnal",
-               .data           = NULL,
-               .maxlen         = 0,
-               .mode           = 0555,
-               .child          = kqswnal_ctl_table
-       },
-       { 0 }
-};
-
-int
-kqswnal_tunables_init ()
-{
-       kqswnal_tunables.kqn_sysctl =
-               register_sysctl_table(kqswnal_top_ctl_table);
-
-       if (kqswnal_tunables.kqn_sysctl == NULL)
-               CWARN("Can't setup /proc tunables\n");
-
-       return 0;
-}
-
-void kqswnal_tunables_fini()
-{
-       if (kqswnal_tunables.kqn_sysctl != NULL)
-               unregister_sysctl_table(kqswnal_tunables.kqn_sysctl);
-}
-#else
-int
-kqswnal_tunables_init ()
-{
-        return 0;
-}
-
-void
-kqswnal_tunables_fini ()
-{
-}
-#endif
diff --git a/lnet/klnds/ralnd/Makefile.in b/lnet/klnds/ralnd/Makefile.in
deleted file mode 100644 (file)
index e1f5e82..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES := kralnd
-kralnd-objs := ralnd.o ralnd_cb.o ralnd_modparams.o
-
-EXTRA_POST_CFLAGS := @RACPPFLAGS@
-
-@INCLUDE_RULES@
diff --git a/lnet/klnds/ralnd/autoMakefile.am b/lnet/klnds/ralnd/autoMakefile.am
deleted file mode 100644 (file)
index 0d79f3a..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# GPL HEADER START
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 only,
-# as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License version 2 for more details (a copy is included
-# in the LICENSE file that accompanied this code).
-#
-# You should have received a copy of the GNU General Public License
-# version 2 along with this program; If not, see
-# http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-#
-# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-# CA 95054 USA or visit www.sun.com if you need additional information or
-# have any questions.
-#
-# GPL HEADER END
-#
-
-#
-# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# This file is part of Lustre, http://www.lustre.org/
-# Lustre is a trademark of Sun Microsystems, Inc.
-#
-
-if MODULES
-if BUILD_RALND
-modulenet_DATA = kralnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-EXTRA_DIST = $(kralnd-objs:%.o=%.c) ralnd.h
diff --git a/lnet/klnds/ralnd/ralnd.c b/lnet/klnds/ralnd/ralnd.c
deleted file mode 100644 (file)
index a4b06ca..0000000
+++ /dev/null
@@ -1,1744 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2014, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ralnd/ralnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-#include "ralnd.h"
-
-static int        kranal_devids[RANAL_MAXDEVS] = {RAPK_MAIN_DEVICE_ID,
-                                                  RAPK_EXPANSION_DEVICE_ID};
-
-lnd_t the_kralnd = {
-        .lnd_type       = RALND,
-        .lnd_startup    = kranal_startup,
-        .lnd_shutdown   = kranal_shutdown,
-        .lnd_ctl        = kranal_ctl,
-        .lnd_send       = kranal_send,
-        .lnd_recv       = kranal_recv,
-        .lnd_eager_recv = kranal_eager_recv,
-        .lnd_accept     = kranal_accept,
-};
-
-kra_data_t              kranal_data;
-
-void
-kranal_pack_connreq(kra_connreq_t *connreq, kra_conn_t *conn, lnet_nid_t dstnid)
-{
-        RAP_RETURN   rrc;
-
-        memset(connreq, 0, sizeof(*connreq));
-
-        connreq->racr_magic     = RANAL_MSG_MAGIC;
-        connreq->racr_version   = RANAL_MSG_VERSION;
-
-        if (conn == NULL)                       /* prepping a "stub" reply */
-                return;
-
-        connreq->racr_devid     = conn->rac_device->rad_id;
-        connreq->racr_srcnid    = kranal_data.kra_ni->ni_nid;
-        connreq->racr_dstnid    = dstnid;
-        connreq->racr_peerstamp = kranal_data.kra_peerstamp;
-        connreq->racr_connstamp = conn->rac_my_connstamp;
-        connreq->racr_timeout   = conn->rac_timeout;
-
-        rrc = RapkGetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
-        LASSERT(rrc == RAP_SUCCESS);
-}
-
-int
-kranal_recv_connreq(struct socket *sock, kra_connreq_t *connreq, int active)
-{
-        int         timeout = active ? *kranal_tunables.kra_timeout :
-                                        lnet_acceptor_timeout();
-        int         swab;
-        int         rc;
-
-        /* return 0 on success, -ve on error, +ve to tell the peer I'm "old" */
-
-        rc = libcfs_sock_read(sock, &connreq->racr_magic, 
-                              sizeof(connreq->racr_magic), timeout);
-        if (rc != 0) {
-                CERROR("Read(magic) failed(1): %d\n", rc);
-                return -EIO;
-        }
-
-        if (connreq->racr_magic != RANAL_MSG_MAGIC &&
-            connreq->racr_magic != __swab32(RANAL_MSG_MAGIC)) {
-                /* Unexpected magic! */
-                if (!active &&
-                    (connreq->racr_magic == LNET_PROTO_MAGIC ||
-                     connreq->racr_magic == __swab32(LNET_PROTO_MAGIC))) {
-                        /* future protocol version compatibility!
-                         * When LNET unifies protocols over all LNDs, the first
-                         * thing sent will be a version query.  +ve rc means I
-                         * reply with my current magic/version */
-                        return EPROTO;
-                }
-
-                CERROR("Unexpected magic %08x (%s)\n",
-                       connreq->racr_magic, active ? "active" : "passive");
-                return -EPROTO;
-        }
-
-        swab = (connreq->racr_magic == __swab32(RANAL_MSG_MAGIC));
-
-        rc = libcfs_sock_read(sock, &connreq->racr_version,
-                              sizeof(connreq->racr_version), timeout);
-        if (rc != 0) {
-                CERROR("Read(version) failed: %d\n", rc);
-                return -EIO;
-        }
-
-        if (swab)
-                __swab16s(&connreq->racr_version);
-        
-        if (connreq->racr_version != RANAL_MSG_VERSION) {
-                if (active) {
-                        CERROR("Unexpected version %d\n", connreq->racr_version);
-                        return -EPROTO;
-                }
-                /* If this is a future version of the ralnd protocol, and I'm
-                 * passive (accepted the connection), tell my peer I'm "old"
-                 * (+ve rc) */
-                return EPROTO;
-        }
-
-        rc = libcfs_sock_read(sock, &connreq->racr_devid,
-                              sizeof(connreq->racr_version) -
-                              offsetof(kra_connreq_t, racr_devid),
-                              timeout);
-        if (rc != 0) {
-                CERROR("Read(body) failed: %d\n", rc);
-                return -EIO;
-        }
-
-        if (swab) {
-                __swab32s(&connreq->racr_magic);
-                __swab16s(&connreq->racr_version);
-                __swab16s(&connreq->racr_devid);
-                __swab64s(&connreq->racr_srcnid);
-                __swab64s(&connreq->racr_dstnid);
-                __swab64s(&connreq->racr_peerstamp);
-                __swab64s(&connreq->racr_connstamp);
-                __swab32s(&connreq->racr_timeout);
-
-                __swab32s(&connreq->racr_riparams.HostId);
-                __swab32s(&connreq->racr_riparams.FmaDomainHndl);
-                __swab32s(&connreq->racr_riparams.PTag);
-                __swab32s(&connreq->racr_riparams.CompletionCookie);
-        }
-
-        if (connreq->racr_srcnid == LNET_NID_ANY ||
-            connreq->racr_dstnid == LNET_NID_ANY) {
-                CERROR("Received LNET_NID_ANY\n");
-                return -EPROTO;
-        }
-
-        if (connreq->racr_timeout < RANAL_MIN_TIMEOUT) {
-                CERROR("Received timeout %d < MIN %d\n",
-                       connreq->racr_timeout, RANAL_MIN_TIMEOUT);
-                return -EPROTO;
-        }
-
-        return 0;
-}
-
-int
-kranal_close_stale_conns_locked (kra_peer_t *peer, kra_conn_t *newconn)
-{
-        kra_conn_t         *conn;
-        cfs_list_t         *ctmp;
-        cfs_list_t         *cnxt;
-        int                 loopback;
-        int                 count = 0;
-
-        loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid;
-
-        cfs_list_for_each_safe (ctmp, cnxt, &peer->rap_conns) {
-                conn = cfs_list_entry(ctmp, kra_conn_t, rac_list);
-
-                if (conn == newconn)
-                        continue;
-
-                if (conn->rac_peerstamp != newconn->rac_peerstamp) {
-                        CDEBUG(D_NET, "Closing stale conn nid: %s "
-                               " peerstamp:"LPX64"("LPX64")\n", 
-                               libcfs_nid2str(peer->rap_nid),
-                               conn->rac_peerstamp, newconn->rac_peerstamp);
-                        LASSERT (conn->rac_peerstamp < newconn->rac_peerstamp);
-                        count++;
-                        kranal_close_conn_locked(conn, -ESTALE);
-                        continue;
-                }
-
-                if (conn->rac_device != newconn->rac_device)
-                        continue;
-
-                if (loopback &&
-                    newconn->rac_my_connstamp == conn->rac_peer_connstamp &&
-                    newconn->rac_peer_connstamp == conn->rac_my_connstamp)
-                        continue;
-
-                LASSERT (conn->rac_peer_connstamp < newconn->rac_peer_connstamp);
-
-                CDEBUG(D_NET, "Closing stale conn nid: %s"
-                       " connstamp:"LPX64"("LPX64")\n", 
-                       libcfs_nid2str(peer->rap_nid),
-                       conn->rac_peer_connstamp, newconn->rac_peer_connstamp);
-
-                count++;
-                kranal_close_conn_locked(conn, -ESTALE);
-        }
-
-        return count;
-}
-
-int
-kranal_conn_isdup_locked(kra_peer_t *peer, kra_conn_t *newconn)
-{
-        kra_conn_t       *conn;
-        cfs_list_t       *tmp;
-        int               loopback;
-
-        loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid;
-
-        cfs_list_for_each(tmp, &peer->rap_conns) {
-                conn = cfs_list_entry(tmp, kra_conn_t, rac_list);
-
-                /* 'newconn' is from an earlier version of 'peer'!!! */
-                if (newconn->rac_peerstamp < conn->rac_peerstamp)
-                        return 1;
-
-                /* 'conn' is from an earlier version of 'peer': it will be
-                 * removed when we cull stale conns later on... */
-                if (newconn->rac_peerstamp > conn->rac_peerstamp)
-                        continue;
-
-                /* Different devices are OK */
-                if (conn->rac_device != newconn->rac_device)
-                        continue;
-
-                /* It's me connecting to myself */
-                if (loopback &&
-                    newconn->rac_my_connstamp == conn->rac_peer_connstamp &&
-                    newconn->rac_peer_connstamp == conn->rac_my_connstamp)
-                        continue;
-
-                /* 'newconn' is an earlier connection from 'peer'!!! */
-                if (newconn->rac_peer_connstamp < conn->rac_peer_connstamp)
-                        return 2;
-
-                /* 'conn' is an earlier connection from 'peer': it will be
-                 * removed when we cull stale conns later on... */
-                if (newconn->rac_peer_connstamp > conn->rac_peer_connstamp)
-                        continue;
-
-                /* 'newconn' has the SAME connection stamp; 'peer' isn't
-                 * playing the game... */
-                return 3;
-        }
-
-        return 0;
-}
-
-void
-kranal_set_conn_uniqueness (kra_conn_t *conn)
-{
-        unsigned long  flags;
-
-       write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        conn->rac_my_connstamp = kranal_data.kra_connstamp++;
-
-        do {    /* allocate a unique cqid */
-                conn->rac_cqid = kranal_data.kra_next_cqid++;
-        } while (kranal_cqid2conn_locked(conn->rac_cqid) != NULL);
-
-       write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-}
-
-int
-kranal_create_conn(kra_conn_t **connp, kra_device_t *dev)
-{
-       kra_conn_t    *conn;
-       RAP_RETURN     rrc;
-
-       LASSERT (!in_interrupt());
-       LIBCFS_ALLOC(conn, sizeof(*conn));
-
-       if (conn == NULL)
-               return -ENOMEM;
-
-        memset(conn, 0, sizeof(*conn));
-       atomic_set(&conn->rac_refcount, 1);
-        CFS_INIT_LIST_HEAD(&conn->rac_list);
-        CFS_INIT_LIST_HEAD(&conn->rac_hashlist);
-        CFS_INIT_LIST_HEAD(&conn->rac_schedlist);
-        CFS_INIT_LIST_HEAD(&conn->rac_fmaq);
-        CFS_INIT_LIST_HEAD(&conn->rac_rdmaq);
-        CFS_INIT_LIST_HEAD(&conn->rac_replyq);
-       spin_lock_init(&conn->rac_lock);
-
-        kranal_set_conn_uniqueness(conn);
-
-        conn->rac_device = dev;
-        conn->rac_timeout = MAX(*kranal_tunables.kra_timeout, RANAL_MIN_TIMEOUT);
-        kranal_update_reaper_timeout(conn->rac_timeout);
-
-        rrc = RapkCreateRi(dev->rad_handle, conn->rac_cqid,
-                           &conn->rac_rihandle);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("RapkCreateRi failed: %d\n", rrc);
-                LIBCFS_FREE(conn, sizeof(*conn));
-                return -ENETDOWN;
-        }
-
-       atomic_inc(&kranal_data.kra_nconns);
-        *connp = conn;
-        return 0;
-}
-
-void
-kranal_destroy_conn(kra_conn_t *conn)
-{
-       RAP_RETURN         rrc;
-
-       LASSERT (!in_interrupt());
-       LASSERT (!conn->rac_scheduled);
-       LASSERT (cfs_list_empty(&conn->rac_list));
-       LASSERT (cfs_list_empty(&conn->rac_hashlist));
-       LASSERT (cfs_list_empty(&conn->rac_schedlist));
-       LASSERT (atomic_read(&conn->rac_refcount) == 0);
-       LASSERT (cfs_list_empty(&conn->rac_fmaq));
-       LASSERT (cfs_list_empty(&conn->rac_rdmaq));
-       LASSERT (cfs_list_empty(&conn->rac_replyq));
-
-       rrc = RapkDestroyRi(conn->rac_device->rad_handle,
-                           conn->rac_rihandle);
-       LASSERT (rrc == RAP_SUCCESS);
-
-       if (conn->rac_peer != NULL)
-               kranal_peer_decref(conn->rac_peer);
-
-       LIBCFS_FREE(conn, sizeof(*conn));
-       atomic_dec(&kranal_data.kra_nconns);
-}
-
-void
-kranal_terminate_conn_locked (kra_conn_t *conn)
-{
-       LASSERT (!in_interrupt());
-       LASSERT (conn->rac_state == RANAL_CONN_CLOSING);
-       LASSERT (!cfs_list_empty(&conn->rac_hashlist));
-       LASSERT (cfs_list_empty(&conn->rac_list));
-
-       /* Remove from conn hash table: no new callbacks */
-       cfs_list_del_init(&conn->rac_hashlist);
-       kranal_conn_decref(conn);
-
-       conn->rac_state = RANAL_CONN_CLOSED;
-
-       /* schedule to clear out all uncompleted comms in context of dev's
-        * scheduler */
-       kranal_schedule_conn(conn);
-}
-
-void
-kranal_close_conn_locked (kra_conn_t *conn, int error)
-{
-       kra_peer_t        *peer = conn->rac_peer;
-
-       CDEBUG_LIMIT(error == 0 ? D_NET : D_NETERROR,
-                    "closing conn to %s: error %d\n",
-                    libcfs_nid2str(peer->rap_nid), error);
-
-       LASSERT (!in_interrupt());
-       LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED);
-       LASSERT (!cfs_list_empty(&conn->rac_hashlist));
-       LASSERT (!cfs_list_empty(&conn->rac_list));
-
-       cfs_list_del_init(&conn->rac_list);
-
-       if (cfs_list_empty(&peer->rap_conns) &&
-           peer->rap_persistence == 0) {
-               /* Non-persistent peer with no more conns... */
-               kranal_unlink_peer_locked(peer);
-       }
-
-       /* Reset RX timeout to ensure we wait for an incoming CLOSE for the
-        * full timeout.  If we get a CLOSE we know the peer has stopped all
-        * RDMA.  Otherwise if we wait for the full timeout we can also be sure
-        * all RDMA has stopped. */
-       conn->rac_last_rx = jiffies;
-       smp_mb();
-
-       conn->rac_state = RANAL_CONN_CLOSING;
-       kranal_schedule_conn(conn);             /* schedule sending CLOSE */
-
-       kranal_conn_decref(conn);               /* lose peer's ref */
-}
-
-void
-kranal_close_conn (kra_conn_t *conn, int error)
-{
-        unsigned long    flags;
-
-
-       write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        if (conn->rac_state == RANAL_CONN_ESTABLISHED)
-                kranal_close_conn_locked(conn, error);
-
-       write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-}
-
-int
-kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq,
-                       __u32 peer_ip, int peer_port)
-{
-       kra_device_t  *dev = conn->rac_device;
-       unsigned long  flags;
-       RAP_RETURN     rrc;
-
-       /* CAVEAT EMPTOR: we're really overloading rac_last_tx + rac_keepalive
-        * to do RapkCompleteSync() timekeeping (see kibnal_scheduler). */
-       conn->rac_last_tx = jiffies;
-       conn->rac_keepalive = 0;
-
-       rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
-       if (rrc != RAP_SUCCESS) {
-               CERROR("Error setting riparams from %u.%u.%u.%u/%d: %d\n",
-                      HIPQUAD(peer_ip), peer_port, rrc);
-               return -ECONNABORTED;
-       }
-
-       /* Schedule conn on rad_new_conns */
-       kranal_conn_addref(conn);
-       spin_lock_irqsave(&dev->rad_lock, flags);
-       cfs_list_add_tail(&conn->rac_schedlist, &dev->rad_new_conns);
-       wake_up(&dev->rad_waitq);
-       spin_unlock_irqrestore(&dev->rad_lock, flags);
-
-       rrc = RapkWaitToConnect(conn->rac_rihandle);
-       if (rrc != RAP_SUCCESS) {
-               CERROR("Error waiting to connect to %u.%u.%u.%u/%d: %d\n",
-                      HIPQUAD(peer_ip), peer_port, rrc);
-               return -ECONNABORTED;
-       }
-
-       /* Scheduler doesn't touch conn apart from to deschedule and decref it
-        * after RapkCompleteSync() return success, so conn is all mine */
-
-       conn->rac_peerstamp = connreq->racr_peerstamp;
-       conn->rac_peer_connstamp = connreq->racr_connstamp;
-       conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout);
-       kranal_update_reaper_timeout(conn->rac_keepalive);
-       return 0;
-}
-
-int
-kranal_passive_conn_handshake (struct socket *sock, lnet_nid_t *src_nidp,
-                               lnet_nid_t *dst_nidp, kra_conn_t **connp)
-{
-        __u32                peer_ip;
-        unsigned int         peer_port;
-        kra_connreq_t        rx_connreq;
-        kra_connreq_t        tx_connreq;
-        kra_conn_t          *conn;
-        kra_device_t        *dev;
-        int                  rc;
-        int                  i;
-
-        rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
-        if (rc != 0) {
-                CERROR("Can't get peer's IP: %d\n", rc);
-                return rc;
-        }
-
-        rc = kranal_recv_connreq(sock, &rx_connreq, 0);
-
-        if (rc < 0) {
-                CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n",
-                       HIPQUAD(peer_ip), peer_port, rc);
-                return rc;
-        }
-
-        if (rc > 0) {
-                /* Request from "new" peer: send reply with my MAGIC/VERSION to
-                 * tell her I'm old... */
-                kranal_pack_connreq(&tx_connreq, NULL, LNET_NID_ANY);
-
-                rc = libcfs_sock_write(sock, &tx_connreq, sizeof(tx_connreq),
-                                       lnet_acceptor_timeout());
-                if (rc != 0)
-                        CERROR("Can't tx stub connreq to %u.%u.%u.%u/%d: %d\n",
-                               HIPQUAD(peer_ip), peer_port, rc);
-
-                return -EPROTO;
-        }
-
-        for (i = 0;;i++) {
-                if (i == kranal_data.kra_ndevs) {
-                        CERROR("Can't match dev %d from %u.%u.%u.%u/%d\n",
-                               rx_connreq.racr_devid, HIPQUAD(peer_ip), peer_port);
-                        return -ENODEV;
-                }
-                dev = &kranal_data.kra_devices[i];
-                if (dev->rad_id == rx_connreq.racr_devid)
-                        break;
-        }
-
-        rc = kranal_create_conn(&conn, dev);
-        if (rc != 0)
-                return rc;
-
-        kranal_pack_connreq(&tx_connreq, conn, rx_connreq.racr_srcnid);
-
-        rc = libcfs_sock_write(sock, &tx_connreq, sizeof(tx_connreq),
-                               lnet_acceptor_timeout());
-        if (rc != 0) {
-                CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n",
-                       HIPQUAD(peer_ip), peer_port, rc);
-                kranal_conn_decref(conn);
-                return rc;
-        }
-
-        rc = kranal_set_conn_params(conn, &rx_connreq, peer_ip, peer_port);
-        if (rc != 0) {
-                kranal_conn_decref(conn);
-                return rc;
-        }
-
-        *connp = conn;
-        *src_nidp = rx_connreq.racr_srcnid;
-        *dst_nidp = rx_connreq.racr_dstnid;
-        return 0;
-}
-
-int
-kranal_active_conn_handshake(kra_peer_t *peer,
-                             lnet_nid_t *dst_nidp, kra_conn_t **connp)
-{
-        kra_connreq_t       connreq;
-        kra_conn_t         *conn;
-        kra_device_t       *dev;
-        struct socket      *sock;
-        int                 rc;
-        unsigned int        idx;
-
-        /* spread connections over all devices using both peer NIDs to ensure
-         * all nids use all devices */
-        idx = peer->rap_nid + kranal_data.kra_ni->ni_nid;
-        dev = &kranal_data.kra_devices[idx % kranal_data.kra_ndevs];
-
-        rc = kranal_create_conn(&conn, dev);
-        if (rc != 0)
-                return rc;
-
-        kranal_pack_connreq(&connreq, conn, peer->rap_nid);
-
-        if (the_lnet.ln_testprotocompat != 0) {
-                /* single-shot proto test */
-                LNET_LOCK();
-                if ((the_lnet.ln_testprotocompat & 1) != 0) {
-                        connreq.racr_version++;
-                        the_lnet.ln_testprotocompat &= ~1;
-                }
-                if ((the_lnet.ln_testprotocompat & 2) != 0) {
-                        connreq.racr_magic = LNET_PROTO_MAGIC;
-                        the_lnet.ln_testprotocompat &= ~2;
-                }
-                LNET_UNLOCK();
-        }
-
-        rc = lnet_connect(&sock, peer->rap_nid,
-                         0, peer->rap_ip, peer->rap_port);
-        if (rc != 0)
-                goto failed_0;
-
-        /* CAVEAT EMPTOR: the passive side receives with a SHORT rx timeout
-         * immediately after accepting a connection, so we connect and then
-         * send immediately. */
-
-        rc = libcfs_sock_write(sock, &connreq, sizeof(connreq),
-                               lnet_acceptor_timeout());
-        if (rc != 0) {
-                CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n",
-                       HIPQUAD(peer->rap_ip), peer->rap_port, rc);
-                goto failed_2;
-        }
-
-        rc = kranal_recv_connreq(sock, &connreq, 1);
-        if (rc != 0) {
-                CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n",
-                       HIPQUAD(peer->rap_ip), peer->rap_port, rc);
-                goto failed_2;
-        }
-
-        libcfs_sock_release(sock);
-        rc = -EPROTO;
-
-        if (connreq.racr_srcnid != peer->rap_nid) {
-                CERROR("Unexpected srcnid from %u.%u.%u.%u/%d: "
-                       "received %s expected %s\n",
-                       HIPQUAD(peer->rap_ip), peer->rap_port,
-                       libcfs_nid2str(connreq.racr_srcnid), 
-                       libcfs_nid2str(peer->rap_nid));
-                goto failed_1;
-        }
-
-        if (connreq.racr_devid != dev->rad_id) {
-                CERROR("Unexpected device id from %u.%u.%u.%u/%d: "
-                       "received %d expected %d\n",
-                       HIPQUAD(peer->rap_ip), peer->rap_port,
-                       connreq.racr_devid, dev->rad_id);
-                goto failed_1;
-        }
-
-        rc = kranal_set_conn_params(conn, &connreq,
-                                    peer->rap_ip, peer->rap_port);
-        if (rc != 0)
-                goto failed_1;
-
-        *connp = conn;
-        *dst_nidp = connreq.racr_dstnid;
-        return 0;
-
- failed_2:
-        libcfs_sock_release(sock);
- failed_1:
-        lnet_connect_console_error(rc, peer->rap_nid,
-                                  peer->rap_ip, peer->rap_port);
- failed_0:
-        kranal_conn_decref(conn);
-        return rc;
-}
-
-int
-kranal_conn_handshake (struct socket *sock, kra_peer_t *peer)
-{
-        kra_peer_t        *peer2;
-        kra_tx_t          *tx;
-        lnet_nid_t         peer_nid;
-        lnet_nid_t         dst_nid;
-        unsigned long      flags;
-        kra_conn_t        *conn;
-        int                rc;
-        int                nstale;
-        int                new_peer = 0;
-
-        if (sock == NULL) {
-                /* active: connd wants to connect to 'peer' */
-                LASSERT (peer != NULL);
-                LASSERT (peer->rap_connecting);
-
-                rc = kranal_active_conn_handshake(peer, &dst_nid, &conn);
-                if (rc != 0)
-                        return rc;
-
-               write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-                if (!kranal_peer_active(peer)) {
-                        /* raced with peer getting unlinked */
-                       write_unlock_irqrestore(&kranal_data. \
-                                                    kra_global_lock,
-                                                    flags);
-                        kranal_conn_decref(conn);
-                        return -ESTALE;
-                }
-
-                peer_nid = peer->rap_nid;
-        } else {
-                /* passive: listener accepted 'sock' */
-                LASSERT (peer == NULL);
-
-                rc = kranal_passive_conn_handshake(sock, &peer_nid,
-                                                   &dst_nid, &conn);
-                if (rc != 0)
-                        return rc;
-
-                /* assume this is a new peer */
-                rc = kranal_create_peer(&peer, peer_nid);
-                if (rc != 0) {
-                        CERROR("Can't create conn for %s\n", 
-                               libcfs_nid2str(peer_nid));
-                        kranal_conn_decref(conn);
-                        return -ENOMEM;
-                }
-
-               write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-                peer2 = kranal_find_peer_locked(peer_nid);
-                if (peer2 == NULL) {
-                        new_peer = 1;
-                } else {
-                        /* peer_nid already in the peer table */
-                        kranal_peer_decref(peer);
-                        peer = peer2;
-                }
-        }
-
-        LASSERT ((!new_peer) != (!kranal_peer_active(peer)));
-
-        /* Refuse connection if peer thinks we are a different NID.  We check
-         * this while holding the global lock, to synch with connection
-         * destruction on NID change. */
-        if (kranal_data.kra_ni->ni_nid != dst_nid) {
-               write_unlock_irqrestore(&kranal_data.kra_global_lock,
-                                            flags);
-
-                CERROR("Stale/bad connection with %s: dst_nid %s, expected %s\n",
-                       libcfs_nid2str(peer_nid), libcfs_nid2str(dst_nid), 
-                       libcfs_nid2str(kranal_data.kra_ni->ni_nid));
-                rc = -ESTALE;
-                goto failed;
-        }
-
-        /* Refuse to duplicate an existing connection (both sides might try to
-         * connect at once).  NB we return success!  We _are_ connected so we
-         * _don't_ have any blocked txs to complete with failure. */
-        rc = kranal_conn_isdup_locked(peer, conn);
-        if (rc != 0) {
-                LASSERT (!cfs_list_empty(&peer->rap_conns));
-                LASSERT (cfs_list_empty(&peer->rap_tx_queue));
-               write_unlock_irqrestore(&kranal_data.kra_global_lock,
-                                            flags);
-                CWARN("Not creating duplicate connection to %s: %d\n",
-                      libcfs_nid2str(peer_nid), rc);
-                rc = 0;
-                goto failed;
-        }
-
-        if (new_peer) {
-                /* peer table takes my ref on the new peer */
-                cfs_list_add_tail(&peer->rap_list,
-                                  kranal_nid2peerlist(peer_nid));
-        }
-
-        /* initialise timestamps before reaper looks at them */
-        conn->rac_last_tx = conn->rac_last_rx = jiffies;
-
-        kranal_peer_addref(peer);               /* +1 ref for conn */
-        conn->rac_peer = peer;
-        cfs_list_add_tail(&conn->rac_list, &peer->rap_conns);
-
-        kranal_conn_addref(conn);               /* +1 ref for conn table */
-        cfs_list_add_tail(&conn->rac_hashlist,
-                          kranal_cqid2connlist(conn->rac_cqid));
-
-        /* Schedule all packets blocking for a connection */
-        while (!cfs_list_empty(&peer->rap_tx_queue)) {
-                tx = cfs_list_entry(peer->rap_tx_queue.next,
-                                    kra_tx_t, tx_list);
-
-                cfs_list_del(&tx->tx_list);
-                kranal_post_fma(conn, tx);
-        }
-
-        nstale = kranal_close_stale_conns_locked(peer, conn);
-
-       write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-        /* CAVEAT EMPTOR: passive peer can disappear NOW */
-
-        if (nstale != 0)
-                CWARN("Closed %d stale conns to %s\n", nstale, 
-                      libcfs_nid2str(peer_nid));
-
-        CWARN("New connection to %s on devid[%d] = %d\n",
-               libcfs_nid2str(peer_nid), 
-               conn->rac_device->rad_idx, conn->rac_device->rad_id);
-
-        /* Ensure conn gets checked.  Transmits may have been queued and an
-         * FMA event may have happened before it got in the cq hash table */
-        kranal_schedule_conn(conn);
-        return 0;
-
- failed:
-        if (new_peer)
-                kranal_peer_decref(peer);
-        kranal_conn_decref(conn);
-        return rc;
-}
-
-void
-kranal_connect (kra_peer_t *peer)
-{
-        kra_tx_t          *tx;
-        unsigned long      flags;
-        cfs_list_t         zombies;
-        int                rc;
-
-        LASSERT (peer->rap_connecting);
-
-        CDEBUG(D_NET, "About to handshake %s\n", 
-               libcfs_nid2str(peer->rap_nid));
-
-        rc = kranal_conn_handshake(NULL, peer);
-
-        CDEBUG(D_NET, "Done handshake %s:%d \n", 
-               libcfs_nid2str(peer->rap_nid), rc);
-
-       write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        LASSERT (peer->rap_connecting);
-        peer->rap_connecting = 0;
-
-        if (rc == 0) {
-                /* kranal_conn_handshake() queues blocked txs immediately on
-                 * success to avoid messages jumping the queue */
-                LASSERT (cfs_list_empty(&peer->rap_tx_queue));
-
-                peer->rap_reconnect_interval = 0; /* OK to reconnect at any time */
-
-               write_unlock_irqrestore(&kranal_data.kra_global_lock,
-                                            flags);
-                return;
-        }
-
-        peer->rap_reconnect_interval *= 2;
-        peer->rap_reconnect_interval =
-                MAX(peer->rap_reconnect_interval,
-                    *kranal_tunables.kra_min_reconnect_interval);
-        peer->rap_reconnect_interval =
-                MIN(peer->rap_reconnect_interval,
-                    *kranal_tunables.kra_max_reconnect_interval);
-
-       peer->rap_reconnect_time = jiffies +
-               msecs_to_jiffies(peer->rap_reconnect_interval * MSEC_PER_SEC);
-
-        /* Grab all blocked packets while we have the global lock */
-        cfs_list_add(&zombies, &peer->rap_tx_queue);
-        cfs_list_del_init(&peer->rap_tx_queue);
-
-       write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-        if (cfs_list_empty(&zombies))
-                return;
-
-        CNETERR("Dropping packets for %s: connection failed\n",
-                libcfs_nid2str(peer->rap_nid));
-
-        do {
-                tx = cfs_list_entry(zombies.next, kra_tx_t, tx_list);
-
-                cfs_list_del(&tx->tx_list);
-                kranal_tx_done(tx, -EHOSTUNREACH);
-
-        } while (!cfs_list_empty(&zombies));
-}
-
-void
-kranal_free_acceptsock (kra_acceptsock_t *ras)
-{
-        libcfs_sock_release(ras->ras_sock);
-        LIBCFS_FREE(ras, sizeof(*ras));
-}
-
-int
-kranal_accept (lnet_ni_t *ni, struct socket *sock)
-{
-       kra_acceptsock_t  *ras;
-       int                rc;
-       __u32              peer_ip;
-       int                peer_port;
-       unsigned long      flags;
-
-       rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
-       LASSERT (rc == 0);                      /* we succeeded before */
-
-       LIBCFS_ALLOC(ras, sizeof(*ras));
-       if (ras == NULL) {
-               CERROR("ENOMEM allocating connection request from "
-                      "%u.%u.%u.%u\n", HIPQUAD(peer_ip));
-               return -ENOMEM;
-       }
-
-       ras->ras_sock = sock;
-
-       spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-
-       cfs_list_add_tail(&ras->ras_list, &kranal_data.kra_connd_acceptq);
-       wake_up(&kranal_data.kra_connd_waitq);
-
-       spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-       return 0;
-}
-
-int
-kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid)
-{
-        kra_peer_t    *peer;
-        unsigned long  flags;
-
-        LASSERT (nid != LNET_NID_ANY);
-
-        LIBCFS_ALLOC(peer, sizeof(*peer));
-        if (peer == NULL)
-                return -ENOMEM;
-
-        memset(peer, 0, sizeof(*peer));         /* zero flags etc */
-
-        peer->rap_nid = nid;
-       atomic_set(&peer->rap_refcount, 1);     /* 1 ref for caller */
-
-        CFS_INIT_LIST_HEAD(&peer->rap_list);
-        CFS_INIT_LIST_HEAD(&peer->rap_connd_list);
-        CFS_INIT_LIST_HEAD(&peer->rap_conns);
-        CFS_INIT_LIST_HEAD(&peer->rap_tx_queue);
-
-        peer->rap_reconnect_interval = 0;       /* OK to connect at any time */
-
-       write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        if (kranal_data.kra_nonewpeers) {
-                /* shutdown has started already */
-               write_unlock_irqrestore(&kranal_data.kra_global_lock,
-                                            flags);
-
-                LIBCFS_FREE(peer, sizeof(*peer));
-                CERROR("Can't create peer: network shutdown\n");
-                return -ESHUTDOWN;
-        }
-
-       atomic_inc(&kranal_data.kra_npeers);
-
-       write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-        *peerp = peer;
-        return 0;
-}
-
-void
-kranal_destroy_peer (kra_peer_t *peer)
-{
-        CDEBUG(D_NET, "peer %s %p deleted\n", 
-               libcfs_nid2str(peer->rap_nid), peer);
-
-       LASSERT (atomic_read(&peer->rap_refcount) == 0);
-        LASSERT (peer->rap_persistence == 0);
-        LASSERT (!kranal_peer_active(peer));
-        LASSERT (!peer->rap_connecting);
-        LASSERT (cfs_list_empty(&peer->rap_conns));
-        LASSERT (cfs_list_empty(&peer->rap_tx_queue));
-        LASSERT (cfs_list_empty(&peer->rap_connd_list));
-
-        LIBCFS_FREE(peer, sizeof(*peer));
-
-        /* NB a peer's connections keep a reference on their peer until
-         * they are destroyed, so we can be assured that _all_ state to do
-         * with this peer has been cleaned up when its refcount drops to
-         * zero. */
-       atomic_dec(&kranal_data.kra_npeers);
-}
-
-kra_peer_t *
-kranal_find_peer_locked (lnet_nid_t nid)
-{
-        cfs_list_t       *peer_list = kranal_nid2peerlist(nid);
-        cfs_list_t       *tmp;
-        kra_peer_t       *peer;
-
-        cfs_list_for_each (tmp, peer_list) {
-
-                peer = cfs_list_entry(tmp, kra_peer_t, rap_list);
-
-                LASSERT (peer->rap_persistence > 0 ||     /* persistent peer */
-                         !cfs_list_empty(&peer->rap_conns));  /* active conn */
-
-                if (peer->rap_nid != nid)
-                        continue;
-
-                CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
-                       peer, libcfs_nid2str(nid), 
-                      atomic_read(&peer->rap_refcount));
-                return peer;
-        }
-        return NULL;
-}
-
-kra_peer_t *
-kranal_find_peer (lnet_nid_t nid)
-{
-        kra_peer_t     *peer;
-
-       read_lock(&kranal_data.kra_global_lock);
-        peer = kranal_find_peer_locked(nid);
-        if (peer != NULL)                       /* +1 ref for caller? */
-                kranal_peer_addref(peer);
-       read_unlock(&kranal_data.kra_global_lock);
-
-        return peer;
-}
-
-void
-kranal_unlink_peer_locked (kra_peer_t *peer)
-{
-        LASSERT (peer->rap_persistence == 0);
-        LASSERT (cfs_list_empty(&peer->rap_conns));
-
-        LASSERT (kranal_peer_active(peer));
-        cfs_list_del_init(&peer->rap_list);
-
-        /* lose peerlist's ref */
-        kranal_peer_decref(peer);
-}
-
-int
-kranal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp,
-                      int *persistencep)
-{
-        kra_peer_t        *peer;
-        cfs_list_t        *ptmp;
-        int                i;
-
-       read_lock(&kranal_data.kra_global_lock);
-
-        for (i = 0; i < kranal_data.kra_peer_hash_size; i++) {
-
-                cfs_list_for_each(ptmp, &kranal_data.kra_peers[i]) {
-
-                        peer = cfs_list_entry(ptmp, kra_peer_t, rap_list);
-                        LASSERT (peer->rap_persistence > 0 ||
-                                 !cfs_list_empty(&peer->rap_conns));
-
-                        if (index-- > 0)
-                                continue;
-
-                        *nidp = peer->rap_nid;
-                        *ipp = peer->rap_ip;
-                        *portp = peer->rap_port;
-                        *persistencep = peer->rap_persistence;
-
-                       read_unlock(&kranal_data.kra_global_lock);
-                        return 0;
-                }
-        }
-
-       read_unlock(&kranal_data.kra_global_lock);
-        return -ENOENT;
-}
-
-int
-kranal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port)
-{
-        unsigned long      flags;
-        kra_peer_t        *peer;
-        kra_peer_t        *peer2;
-        int                rc;
-
-        if (nid == LNET_NID_ANY)
-                return -EINVAL;
-
-        rc = kranal_create_peer(&peer, nid);
-        if (rc != 0)
-                return rc;
-
-       write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        peer2 = kranal_find_peer_locked(nid);
-        if (peer2 != NULL) {
-                kranal_peer_decref(peer);
-                peer = peer2;
-        } else {
-                /* peer table takes existing ref on peer */
-                cfs_list_add_tail(&peer->rap_list,
-                              kranal_nid2peerlist(nid));
-        }
-
-        peer->rap_ip = ip;
-        peer->rap_port = port;
-        peer->rap_persistence++;
-
-       write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-        return 0;
-}
-
-void
-kranal_del_peer_locked (kra_peer_t *peer)
-{
-        cfs_list_t       *ctmp;
-        cfs_list_t       *cnxt;
-        kra_conn_t       *conn;
-
-        peer->rap_persistence = 0;
-
-        if (cfs_list_empty(&peer->rap_conns)) {
-                kranal_unlink_peer_locked(peer);
-        } else {
-                cfs_list_for_each_safe(ctmp, cnxt, &peer->rap_conns) {
-                        conn = cfs_list_entry(ctmp, kra_conn_t, rac_list);
-
-                        kranal_close_conn_locked(conn, 0);
-                }
-                /* peer unlinks itself when last conn is closed */
-        }
-}
-
-int
-kranal_del_peer (lnet_nid_t nid)
-{
-        unsigned long      flags;
-        cfs_list_t        *ptmp;
-        cfs_list_t        *pnxt;
-        kra_peer_t        *peer;
-        int                lo;
-        int                hi;
-        int                i;
-        int                rc = -ENOENT;
-
-       write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        if (nid != LNET_NID_ANY)
-                lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers;
-        else {
-                lo = 0;
-                hi = kranal_data.kra_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                cfs_list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) {
-                        peer = cfs_list_entry(ptmp, kra_peer_t, rap_list);
-                        LASSERT (peer->rap_persistence > 0 ||
-                                 !cfs_list_empty(&peer->rap_conns));
-
-                        if (!(nid == LNET_NID_ANY || peer->rap_nid == nid))
-                                continue;
-
-                        kranal_del_peer_locked(peer);
-                        rc = 0;         /* matched something */
-                }
-        }
-
-       write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-        return rc;
-}
-
-kra_conn_t *
-kranal_get_conn_by_idx (int index)
-{
-        kra_peer_t        *peer;
-        cfs_list_t        *ptmp;
-        kra_conn_t        *conn;
-        cfs_list_t        *ctmp;
-        int                i;
-
-       read_lock(&kranal_data.kra_global_lock);
-
-        for (i = 0; i < kranal_data.kra_peer_hash_size; i++) {
-                cfs_list_for_each (ptmp, &kranal_data.kra_peers[i]) {
-
-                        peer = cfs_list_entry(ptmp, kra_peer_t, rap_list);
-                        LASSERT (peer->rap_persistence > 0 ||
-                                 !cfs_list_empty(&peer->rap_conns));
-
-                        cfs_list_for_each (ctmp, &peer->rap_conns) {
-                                if (index-- > 0)
-                                        continue;
-
-                                conn = cfs_list_entry(ctmp, kra_conn_t,
-                                                      rac_list);
-                                CDEBUG(D_NET, "++conn[%p] -> %s (%d)\n", conn,
-                                       libcfs_nid2str(conn->rac_peer->rap_nid),
-                                      atomic_read(&conn->rac_refcount));
-                               atomic_inc(&conn->rac_refcount);
-                               read_unlock(&kranal_data.kra_global_lock);
-                                return conn;
-                        }
-                }
-        }
-
-       read_unlock(&kranal_data.kra_global_lock);
-        return NULL;
-}
-
-int
-kranal_close_peer_conns_locked (kra_peer_t *peer, int why)
-{
-        kra_conn_t         *conn;
-        cfs_list_t         *ctmp;
-        cfs_list_t         *cnxt;
-        int                 count = 0;
-
-        cfs_list_for_each_safe (ctmp, cnxt, &peer->rap_conns) {
-                conn = cfs_list_entry(ctmp, kra_conn_t, rac_list);
-
-                count++;
-                kranal_close_conn_locked(conn, why);
-        }
-
-        return count;
-}
-
-int
-kranal_close_matching_conns (lnet_nid_t nid)
-{
-        unsigned long       flags;
-        kra_peer_t         *peer;
-        cfs_list_t         *ptmp;
-        cfs_list_t         *pnxt;
-        int                 lo;
-        int                 hi;
-        int                 i;
-        int                 count = 0;
-
-       write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        if (nid != LNET_NID_ANY)
-                lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers;
-        else {
-                lo = 0;
-                hi = kranal_data.kra_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                cfs_list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) {
-
-                        peer = cfs_list_entry(ptmp, kra_peer_t, rap_list);
-                        LASSERT (peer->rap_persistence > 0 ||
-                                 !cfs_list_empty(&peer->rap_conns));
-
-                        if (!(nid == LNET_NID_ANY || nid == peer->rap_nid))
-                                continue;
-
-                        count += kranal_close_peer_conns_locked(peer, 0);
-                }
-        }
-
-       write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-        /* wildcards always succeed */
-        if (nid == LNET_NID_ANY)
-                return 0;
-
-        return (count == 0) ? -ENOENT : 0;
-}
-
-int
-kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
-        struct libcfs_ioctl_data *data = arg;
-        int                       rc = -EINVAL;
-
-        LASSERT (ni == kranal_data.kra_ni);
-
-        switch(cmd) {
-        case IOC_LIBCFS_GET_PEER: {
-                lnet_nid_t   nid = 0;
-                __u32       ip = 0;
-                int         port = 0;
-                int         share_count = 0;
-
-                rc = kranal_get_peer_info(data->ioc_count,
-                                          &nid, &ip, &port, &share_count);
-                data->ioc_nid    = nid;
-                data->ioc_count  = share_count;
-                data->ioc_u32[0] = ip;
-                data->ioc_u32[1] = port;
-                break;
-        }
-        case IOC_LIBCFS_ADD_PEER: {
-                rc = kranal_add_persistent_peer(data->ioc_nid,
-                                                data->ioc_u32[0], /* IP */
-                                                data->ioc_u32[1]); /* port */
-                break;
-        }
-        case IOC_LIBCFS_DEL_PEER: {
-                rc = kranal_del_peer(data->ioc_nid);
-                break;
-        }
-        case IOC_LIBCFS_GET_CONN: {
-                kra_conn_t *conn = kranal_get_conn_by_idx(data->ioc_count);
-
-                if (conn == NULL)
-                        rc = -ENOENT;
-                else {
-                        rc = 0;
-                        data->ioc_nid    = conn->rac_peer->rap_nid;
-                        data->ioc_u32[0] = conn->rac_device->rad_id;
-                        kranal_conn_decref(conn);
-                }
-                break;
-        }
-        case IOC_LIBCFS_CLOSE_CONNECTION: {
-                rc = kranal_close_matching_conns(data->ioc_nid);
-                break;
-        }
-        case IOC_LIBCFS_REGISTER_MYNID: {
-                /* Ignore if this is a noop */
-                if (data->ioc_nid == ni->ni_nid) {
-                        rc = 0;
-                } else {
-                        CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
-                               libcfs_nid2str(data->ioc_nid),
-                               libcfs_nid2str(ni->ni_nid));
-                        rc = -EINVAL;
-                }
-                break;
-        }
-        }
-
-        return rc;
-}
-
-void
-kranal_free_txdescs(cfs_list_t *freelist)
-{
-        kra_tx_t    *tx;
-
-        while (!cfs_list_empty(freelist)) {
-                tx = cfs_list_entry(freelist->next, kra_tx_t, tx_list);
-
-                cfs_list_del(&tx->tx_list);
-                LIBCFS_FREE(tx->tx_phys, LNET_MAX_IOV * sizeof(*tx->tx_phys));
-                LIBCFS_FREE(tx, sizeof(*tx));
-        }
-}
-
-int
-kranal_alloc_txdescs(cfs_list_t *freelist, int n)
-{
-        int            i;
-        kra_tx_t      *tx;
-
-        LASSERT (freelist == &kranal_data.kra_idle_txs);
-        LASSERT (cfs_list_empty(freelist));
-
-        for (i = 0; i < n; i++) {
-
-                LIBCFS_ALLOC(tx, sizeof(*tx));
-                if (tx == NULL) {
-                        CERROR("Can't allocate tx[%d]\n", i);
-                        kranal_free_txdescs(freelist);
-                        return -ENOMEM;
-                }
-
-                LIBCFS_ALLOC(tx->tx_phys,
-                             LNET_MAX_IOV * sizeof(*tx->tx_phys));
-                if (tx->tx_phys == NULL) {
-                        CERROR("Can't allocate tx[%d]->tx_phys\n", i);
-
-                        LIBCFS_FREE(tx, sizeof(*tx));
-                        kranal_free_txdescs(freelist);
-                        return -ENOMEM;
-                }
-
-                tx->tx_buftype = RANAL_BUF_NONE;
-                tx->tx_msg.ram_type = RANAL_MSG_NONE;
-
-                cfs_list_add(&tx->tx_list, freelist);
-        }
-
-        return 0;
-}
-
-int
-kranal_device_init(int id, kra_device_t *dev)
-{
-        int               total_ntx = *kranal_tunables.kra_ntx;
-        RAP_RETURN        rrc;
-
-        dev->rad_id = id;
-        rrc = RapkGetDeviceByIndex(id, kranal_device_callback,
-                                   &dev->rad_handle);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't get Rapidarray Device %d: %d\n", id, rrc);
-                goto failed_0;
-        }
-
-        rrc = RapkReserveRdma(dev->rad_handle, total_ntx);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't reserve %d RDMA descriptors"
-                       " for device %d: %d\n", total_ntx, id, rrc);
-                goto failed_1;
-        }
-
-        rrc = RapkCreateCQ(dev->rad_handle, total_ntx, RAP_CQTYPE_SEND,
-                           &dev->rad_rdma_cqh);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't create rdma cq size %d for device %d: %d\n",
-                       total_ntx, id, rrc);
-                goto failed_1;
-        }
-
-        rrc = RapkCreateCQ(dev->rad_handle, 
-                           *kranal_tunables.kra_fma_cq_size, 
-                           RAP_CQTYPE_RECV, &dev->rad_fma_cqh);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't create fma cq size %d for device %d: %d\n", 
-                       *kranal_tunables.kra_fma_cq_size, id, rrc);
-                goto failed_2;
-        }
-
-        return 0;
-
- failed_2:
-        RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh);
- failed_1:
-        RapkReleaseDevice(dev->rad_handle);
- failed_0:
-        return -ENODEV;
-}
-
-void
-kranal_device_fini(kra_device_t *dev)
-{
-        LASSERT (cfs_list_empty(&dev->rad_ready_conns));
-        LASSERT (cfs_list_empty(&dev->rad_new_conns));
-        LASSERT (dev->rad_nphysmap == 0);
-        LASSERT (dev->rad_nppphysmap == 0);
-        LASSERT (dev->rad_nvirtmap == 0);
-        LASSERT (dev->rad_nobvirtmap == 0);
-
-        LASSERT(dev->rad_scheduler == NULL);
-        RapkDestroyCQ(dev->rad_handle, dev->rad_fma_cqh);
-        RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh);
-        RapkReleaseDevice(dev->rad_handle);
-}
-
-void
-kranal_shutdown (lnet_ni_t *ni)
-{
-        int           i;
-        unsigned long flags;
-
-        CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
-
-        LASSERT (ni == kranal_data.kra_ni);
-        LASSERT (ni->ni_data == &kranal_data);
-
-        switch (kranal_data.kra_init) {
-        default:
-                CERROR("Unexpected state %d\n", kranal_data.kra_init);
-                LBUG();
-
-        case RANAL_INIT_ALL:
-                /* Prevent new peers from being created */
-               write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-                kranal_data.kra_nonewpeers = 1;
-               write_unlock_irqrestore(&kranal_data.kra_global_lock,
-                                            flags);
-
-                /* Remove all existing peers from the peer table */
-                kranal_del_peer(LNET_NID_ANY);
-
-                /* Wait for pending conn reqs to be handled */
-                i = 2;
-               spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-                while (!cfs_list_empty(&kranal_data.kra_connd_acceptq)) {
-                       spin_unlock_irqrestore(&kranal_data.kra_connd_lock,
-                                                   flags);
-                        i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
-                               "waiting for conn reqs to clean up\n");
-                        cfs_pause(cfs_time_seconds(1));
-
-                       spin_lock_irqsave(&kranal_data.kra_connd_lock,
-                                              flags);
-                }
-               spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
-                /* Wait for all peers to be freed */
-                i = 2;
-               while (atomic_read(&kranal_data.kra_npeers) != 0) {
-                        i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
-                               "waiting for %d peers to close down\n",
-                              atomic_read(&kranal_data.kra_npeers));
-                        cfs_pause(cfs_time_seconds(1));
-                }
-                /* fall through */
-
-        case RANAL_INIT_DATA:
-                break;
-        }
-
-        /* Peer state all cleaned up BEFORE setting shutdown, so threads don't
-         * have to worry about shutdown races.  NB connections may be created
-         * while there are still active connds, but these will be temporary
-         * since peer creation always fails after the listener has started to
-         * shut down. */
-       LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
-        
-        /* Flag threads to terminate */
-        kranal_data.kra_shutdown = 1;
-
-       for (i = 0; i < kranal_data.kra_ndevs; i++) {
-               kra_device_t *dev = &kranal_data.kra_devices[i];
-
-               spin_lock_irqsave(&dev->rad_lock, flags);
-               wake_up(&dev->rad_waitq);
-               spin_unlock_irqrestore(&dev->rad_lock, flags);
-       }
-
-       spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-       wake_up_all(&kranal_data.kra_reaper_waitq);
-       spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-
-       LASSERT (cfs_list_empty(&kranal_data.kra_connd_peers));
-       spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-       wake_up_all(&kranal_data.kra_connd_waitq);
-       spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
-        /* Wait for threads to exit */
-        i = 2;
-       while (atomic_read(&kranal_data.kra_nthreads) != 0) {
-                i++;
-                CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                       "Waiting for %d threads to terminate\n",
-                      atomic_read(&kranal_data.kra_nthreads));
-                cfs_pause(cfs_time_seconds(1));
-        }
-
-       LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
-        if (kranal_data.kra_peers != NULL) {
-                for (i = 0; i < kranal_data.kra_peer_hash_size; i++)
-                        LASSERT (cfs_list_empty(&kranal_data.kra_peers[i]));
-
-                LIBCFS_FREE(kranal_data.kra_peers,
-                            sizeof (cfs_list_t) *
-                            kranal_data.kra_peer_hash_size);
-        }
-
-       LASSERT (atomic_read(&kranal_data.kra_nconns) == 0);
-        if (kranal_data.kra_conns != NULL) {
-                for (i = 0; i < kranal_data.kra_conn_hash_size; i++)
-                        LASSERT (cfs_list_empty(&kranal_data.kra_conns[i]));
-
-                LIBCFS_FREE(kranal_data.kra_conns,
-                            sizeof (cfs_list_t) *
-                            kranal_data.kra_conn_hash_size);
-        }
-
-        for (i = 0; i < kranal_data.kra_ndevs; i++)
-                kranal_device_fini(&kranal_data.kra_devices[i]);
-
-        kranal_free_txdescs(&kranal_data.kra_idle_txs);
-
-        CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
-
-       kranal_data.kra_init = RANAL_INIT_NOTHING;
-       module_put(THIS_MODULE);
-}
-
-int
-kranal_startup (lnet_ni_t *ni)
-{
-        struct timeval    tv;
-       int               pkmem = atomic_read(&libcfs_kmemory);
-        int               rc;
-        int               i;
-        kra_device_t     *dev;
-       char              name[16];
-
-        LASSERT (ni->ni_lnd == &the_kralnd);
-
-        /* Only 1 instance supported */
-        if (kranal_data.kra_init != RANAL_INIT_NOTHING) {
-                CERROR ("Only 1 instance supported\n");
-                return -EPERM;
-        }
-
-        if (lnet_set_ip_niaddr(ni) != 0) {
-                CERROR ("Can't determine my NID\n");
-                return -EPERM;
-        }
-
-        if (*kranal_tunables.kra_credits > *kranal_tunables.kra_ntx) {
-                CERROR ("Can't set credits(%d) > ntx(%d)\n",
-                        *kranal_tunables.kra_credits,
-                        *kranal_tunables.kra_ntx);
-                return -EINVAL;
-        }
-        
-        memset(&kranal_data, 0, sizeof(kranal_data)); /* zero pointers, flags etc */
-
-        ni->ni_maxtxcredits = *kranal_tunables.kra_credits;
-        ni->ni_peertxcredits = *kranal_tunables.kra_peercredits;
-
-        ni->ni_data = &kranal_data;
-        kranal_data.kra_ni = ni;
-
-       /* CAVEAT EMPTOR: Every 'Fma' message includes the sender's NID and
-        * a unique (for all time) connstamp so we can uniquely identify
-        * the sender.  The connstamp is an incrementing counter
-        * initialised with seconds + microseconds at startup time.  So we
-        * rely on NOT creating connections more frequently on average than
-        * 1MHz to ensure we don't use old connstamps when we reboot. */
-       do_gettimeofday(&tv);
-       kranal_data.kra_connstamp =
-       kranal_data.kra_peerstamp = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
-       rwlock_init(&kranal_data.kra_global_lock);
-
-       for (i = 0; i < RANAL_MAXDEVS; i++ ) {
-               kra_device_t  *dev = &kranal_data.kra_devices[i];
-
-               dev->rad_idx = i;
-               CFS_INIT_LIST_HEAD(&dev->rad_ready_conns);
-               CFS_INIT_LIST_HEAD(&dev->rad_new_conns);
-               init_waitqueue_head(&dev->rad_waitq);
-               spin_lock_init(&dev->rad_lock);
-       }
-
-       kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT;
-       init_waitqueue_head(&kranal_data.kra_reaper_waitq);
-       spin_lock_init(&kranal_data.kra_reaper_lock);
-
-       CFS_INIT_LIST_HEAD(&kranal_data.kra_connd_acceptq);
-       CFS_INIT_LIST_HEAD(&kranal_data.kra_connd_peers);
-       init_waitqueue_head(&kranal_data.kra_connd_waitq);
-       spin_lock_init(&kranal_data.kra_connd_lock);
-
-        CFS_INIT_LIST_HEAD(&kranal_data.kra_idle_txs);
-       spin_lock_init(&kranal_data.kra_tx_lock);
-
-       /* OK to call kranal_api_shutdown() to cleanup now */
-       kranal_data.kra_init = RANAL_INIT_DATA;
-       try_module_get(THIS_MODULE);
-
-        kranal_data.kra_peer_hash_size = RANAL_PEER_HASH_SIZE;
-        LIBCFS_ALLOC(kranal_data.kra_peers,
-                     sizeof(cfs_list_t) *
-                            kranal_data.kra_peer_hash_size);
-        if (kranal_data.kra_peers == NULL)
-                goto failed;
-
-        for (i = 0; i < kranal_data.kra_peer_hash_size; i++)
-                CFS_INIT_LIST_HEAD(&kranal_data.kra_peers[i]);
-
-        kranal_data.kra_conn_hash_size = RANAL_PEER_HASH_SIZE;
-        LIBCFS_ALLOC(kranal_data.kra_conns,
-                     sizeof(cfs_list_t) *
-                            kranal_data.kra_conn_hash_size);
-        if (kranal_data.kra_conns == NULL)
-                goto failed;
-
-        for (i = 0; i < kranal_data.kra_conn_hash_size; i++)
-                CFS_INIT_LIST_HEAD(&kranal_data.kra_conns[i]);
-
-        rc = kranal_alloc_txdescs(&kranal_data.kra_idle_txs, 
-                                  *kranal_tunables.kra_ntx);
-        if (rc != 0)
-                goto failed;
-
-       rc = kranal_thread_start(kranal_reaper, NULL, "kranal_reaper");
-        if (rc != 0) {
-                CERROR("Can't spawn ranal reaper: %d\n", rc);
-                goto failed;
-        }
-
-        for (i = 0; i < *kranal_tunables.kra_n_connd; i++) {
-               snprintf(name, sizeof(name), "kranal_connd_%02ld", i);
-               rc = kranal_thread_start(kranal_connd,
-                                        (void *)(unsigned long)i, name);
-                if (rc != 0) {
-                        CERROR("Can't spawn ranal connd[%d]: %d\n",
-                               i, rc);
-                        goto failed;
-                }
-        }
-
-        LASSERT (kranal_data.kra_ndevs == 0);
-
-        /* Use all available RapidArray devices */
-        for (i = 0; i < RANAL_MAXDEVS; i++) {
-                dev = &kranal_data.kra_devices[kranal_data.kra_ndevs];
-
-                rc = kranal_device_init(kranal_devids[i], dev);
-                if (rc == 0)
-                        kranal_data.kra_ndevs++;
-        }
-
-        if (kranal_data.kra_ndevs == 0) {
-                CERROR("Can't initialise any RapidArray devices\n");
-                goto failed;
-        }
-        
-        for (i = 0; i < kranal_data.kra_ndevs; i++) {
-                dev = &kranal_data.kra_devices[i];
-               snprintf(name, sizeof(name), "kranal_sd_%02d", dev->rad_idx);
-               rc = kranal_thread_start(kranal_scheduler, dev, name);
-                if (rc != 0) {
-                        CERROR("Can't spawn ranal scheduler[%d]: %d\n",
-                               i, rc);
-                        goto failed;
-                }
-        }
-
-        /* flag everything initialised */
-        kranal_data.kra_init = RANAL_INIT_ALL;
-        /*****************************************************/
-
-        CDEBUG(D_MALLOC, "initial kmem %d\n", pkmem);
-        return 0;
-
- failed:
-        kranal_shutdown(ni);
-        return -ENETDOWN;
-}
-
-void __exit
-kranal_module_fini (void)
-{
-        lnet_unregister_lnd(&the_kralnd);
-        kranal_tunables_fini();
-}
-
-int __init
-kranal_module_init (void)
-{
-        int    rc;
-
-        rc = kranal_tunables_init();
-        if (rc != 0)
-                return rc;
-
-        lnet_register_lnd(&the_kralnd);
-
-        return 0;
-}
-
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Kernel RapidArray LND v0.01");
-MODULE_LICENSE("GPL");
-
-module_init(kranal_module_init);
-module_exit(kranal_module_fini);
diff --git a/lnet/klnds/ralnd/ralnd.h b/lnet/klnds/ralnd/ralnd.h
deleted file mode 100644 (file)
index fb7aa20..0000000
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ralnd/ralnd.h
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/libcfs.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-
-#include <rapl.h>
-
-/* tunables determined at compile time */
-#define RANAL_RESCHED             100           /* # scheduler loops before reschedule */
-
-#define RANAL_PEER_HASH_SIZE      101           /* # peer lists */
-#define RANAL_CONN_HASH_SIZE      101           /* # conn lists */
-
-#define RANAL_MIN_TIMEOUT         5             /* minimum timeout interval (seconds) */
-#define RANAL_TIMEOUT2KEEPALIVE(t) (((t)+1)/2)  /* timeout -> keepalive interval */
-
-/* fixed constants */
-#define RANAL_MAXDEVS             2             /* max # devices RapidArray supports */
-#define RANAL_FMA_MAX_PREFIX      232           /* max bytes in FMA "Prefix" we can use */
-#define RANAL_FMA_MAX_DATA        ((7<<10)-256) /* Max FMA MSG is 7K including prefix */
-
-
-typedef struct
-{
-        int              *kra_n_connd;          /* # connection daemons */
-        int              *kra_min_reconnect_interval; /* first failed connection retry... */
-        int              *kra_max_reconnect_interval; /* ...exponentially increasing to this */
-        int              *kra_ntx;              /* # tx descs */
-        int              *kra_credits;          /* # concurrent sends */
-        int              *kra_peercredits;      /* # concurrent sends to 1 peer */
-        int              *kra_fma_cq_size;      /* # entries in receive CQ */
-        int              *kra_timeout;          /* comms timeout (seconds) */
-        int              *kra_max_immediate;    /* immediate payload breakpoint */
-
-#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
-       struct ctl_table_header *kra_sysctl;  /* sysctl interface */
-#endif
-} kra_tunables_t;
-
-typedef struct
-{
-       RAP_PVOID              rad_handle;    /* device handle */
-       RAP_PVOID              rad_fma_cqh;   /* FMA completion queue handle */
-       RAP_PVOID              rad_rdma_cqh;  /* rdma completion queue handle */
-       int                    rad_id;        /* device id */
-       int                    rad_idx;       /* index in kra_devices */
-       int                    rad_ready;     /* set by device callback */
-       cfs_list_t             rad_ready_conns;/* connections ready to tx/rx */
-       cfs_list_t             rad_new_conns; /* new connections to complete */
-       wait_queue_head_t      rad_waitq;     /* scheduler waits here */
-       spinlock_t             rad_lock;        /* serialise */
-       void                   *rad_scheduler; /* scheduling thread */
-       unsigned int           rad_nphysmap;  /* # phys mappings */
-       unsigned int           rad_nppphysmap;/* # phys pages mapped */
-       unsigned int           rad_nvirtmap;  /* # virt mappings */
-       unsigned long          rad_nobvirtmap;/* # virt bytes mapped */
-} kra_device_t;
-
-typedef struct
-{
-       int               kra_init;            /* initialisation state */
-       int               kra_shutdown;        /* shut down? */
-       atomic_t      kra_nthreads;        /* # live threads */
-       lnet_ni_t        *kra_ni;              /* _the_ nal instance */
-
-       kra_device_t      kra_devices[RANAL_MAXDEVS]; /* device/ptag/cq */
-       int               kra_ndevs;           /* # devices */
-
-       rwlock_t          kra_global_lock;      /* stabilize peer/conn ops */
-
-       cfs_list_t       *kra_peers;           /* hash table of all my known peers */
-       int               kra_peer_hash_size;  /* size of kra_peers */
-       atomic_t      kra_npeers;          /* # peers extant */
-       int               kra_nonewpeers;      /* prevent new peers */
-
-       cfs_list_t       *kra_conns;           /* conns hashed by cqid */
-       int               kra_conn_hash_size;  /* size of kra_conns */
-       __u64             kra_peerstamp;       /* when I started up */
-       __u64             kra_connstamp;       /* conn stamp generator */
-       int               kra_next_cqid;       /* cqid generator */
-       atomic_t      kra_nconns;          /* # connections extant */
-
-       long              kra_new_min_timeout; /* minimum timeout on any new conn */
-       wait_queue_head_t       kra_reaper_waitq;    /* reaper sleeps here */
-       spinlock_t        kra_reaper_lock;     /* serialise */
-
-       cfs_list_t        kra_connd_peers;     /* peers waiting for a connection */
-       cfs_list_t        kra_connd_acceptq;   /* accepted sockets to handshake */
-       wait_queue_head_t       kra_connd_waitq;     /* connection daemons sleep here */
-       spinlock_t        kra_connd_lock;       /* serialise */
-
-       cfs_list_t        kra_idle_txs;        /* idle tx descriptors */
-       __u64             kra_next_tx_cookie;  /* RDMA completion cookie */
-       spinlock_t        kra_tx_lock;          /* serialise */
-} kra_data_t;
-
-#define RANAL_INIT_NOTHING         0
-#define RANAL_INIT_DATA            1
-#define RANAL_INIT_ALL             2
-
-typedef struct kra_acceptsock             /* accepted socket queued for connd */
-{
-        cfs_list_t           ras_list;          /* queue for attention */
-        struct socket       *ras_sock;          /* the accepted socket */
-} kra_acceptsock_t;
-
-/************************************************************************
- * Wire message structs.  These are sent in sender's byte order
- * (i.e. receiver checks magic and flips if required).
- */
-
-typedef struct kra_connreq                      /* connection request/response */
-{                                               /* (sent via socket) */
-        __u32             racr_magic;           /* I'm an ranal connreq */
-        __u16             racr_version;         /* this is my version number */
-        __u16             racr_devid;           /* sender's device ID */
-        __u64             racr_srcnid;          /* sender's NID */
-        __u64             racr_dstnid;          /* who sender expects to listen */
-        __u64             racr_peerstamp;       /* sender's instance stamp */
-        __u64             racr_connstamp;       /* sender's connection stamp */
-        __u32             racr_timeout;         /* sender's timeout */
-        RAP_RI_PARAMETERS racr_riparams;        /* sender's endpoint info */
-} kra_connreq_t;
-
-typedef struct
-{
-        RAP_MEM_KEY       rard_key;
-        RAP_PVOID64       rard_addr;
-        RAP_UINT32        rard_nob;
-} kra_rdma_desc_t;
-
-typedef struct
-{
-        lnet_hdr_t        raim_hdr;             /* portals header */
-        /* Portals payload is in FMA "Message Data" */
-} kra_immediate_msg_t;
-
-typedef struct
-{
-        lnet_hdr_t        raprm_hdr;            /* portals header */
-        __u64             raprm_cookie;         /* opaque completion cookie */
-} kra_putreq_msg_t;
-
-typedef struct
-{
-        __u64             rapam_src_cookie;     /* reflected completion cookie */
-        __u64             rapam_dst_cookie;     /* opaque completion cookie */
-        kra_rdma_desc_t   rapam_desc;           /* sender's sink buffer */
-} kra_putack_msg_t;
-
-typedef struct
-{
-        lnet_hdr_t        ragm_hdr;             /* portals header */
-        __u64             ragm_cookie;          /* opaque completion cookie */
-        kra_rdma_desc_t   ragm_desc;            /* sender's sink buffer */
-} kra_get_msg_t;
-
-typedef struct
-{
-        __u64             racm_cookie;          /* reflected completion cookie */
-} kra_completion_msg_t;
-
-typedef struct                                  /* NB must fit in FMA "Prefix" */
-{
-        __u32             ram_magic;            /* I'm an ranal message */
-        __u16             ram_version;          /* this is my version number */
-        __u16             ram_type;             /* msg type */
-        __u64             ram_srcnid;           /* sender's NID */
-        __u64             ram_connstamp;        /* sender's connection stamp */
-        union {
-                kra_immediate_msg_t   immediate;
-                kra_putreq_msg_t      putreq;
-                kra_putack_msg_t      putack;
-                kra_get_msg_t         get;
-                kra_completion_msg_t  completion;
-        }                    ram_u;
-        __u32             ram_seq;              /* incrementing sequence number */
-} kra_msg_t;
-
-#define RANAL_MSG_MAGIC     LNET_PROTO_RA_MAGIC /* unique magic */
-#define RANAL_MSG_VERSION              1        /* current protocol version */
-
-#define RANAL_MSG_FENCE             0x80        /* fence RDMA */
-
-#define RANAL_MSG_NONE              0x00        /* illegal message */
-#define RANAL_MSG_NOOP              0x01        /* empty ram_u (keepalive) */
-#define RANAL_MSG_IMMEDIATE         0x02        /* ram_u.immediate */
-#define RANAL_MSG_PUT_REQ           0x03        /* ram_u.putreq (src->sink) */
-#define RANAL_MSG_PUT_NAK           0x04        /* ram_u.completion (no PUT match: sink->src) */
-#define RANAL_MSG_PUT_ACK           0x05        /* ram_u.putack (PUT matched: sink->src) */
-#define RANAL_MSG_PUT_DONE          0x86        /* ram_u.completion (src->sink) */
-#define RANAL_MSG_GET_REQ           0x07        /* ram_u.get (sink->src) */
-#define RANAL_MSG_GET_NAK           0x08        /* ram_u.completion (no GET match: src->sink) */
-#define RANAL_MSG_GET_DONE          0x89        /* ram_u.completion (src->sink) */
-#define RANAL_MSG_CLOSE             0x8a        /* empty ram_u */
-
-/***********************************************************************/
-
-typedef struct kra_tx                           /* message descriptor */
-{
-        cfs_list_t            tx_list;      /* queue on idle_txs/rac_sendq/rac_waitq */
-        struct kra_conn      *tx_conn;      /* owning conn */
-        lnet_msg_t           *tx_lntmsg[2]; /* ptl msgs to finalize on completion */
-        unsigned long         tx_qtime;     /* when tx started to wait for something (jiffies) */
-        int                   tx_nob;       /* # bytes of payload */
-        int                   tx_buftype;   /* payload buffer type */
-        void                 *tx_buffer;    /* source/sink buffer */
-        int                   tx_phys_offset; /* first page offset (if phys) */
-        int                   tx_phys_npages; /* # physical pages */
-        RAP_PHYS_REGION      *tx_phys;      /* page descriptors */
-        RAP_MEM_KEY           tx_map_key;   /* mapping key */
-        RAP_RDMA_DESCRIPTOR   tx_rdma_desc; /* rdma descriptor */
-        __u64                 tx_cookie;    /* identify this tx to peer */
-        kra_msg_t             tx_msg;       /* FMA message buffer */
-} kra_tx_t;
-
-#define RANAL_BUF_NONE           0              /* buffer type not set */
-#define RANAL_BUF_IMMEDIATE      1              /* immediate data */
-#define RANAL_BUF_PHYS_UNMAPPED  2              /* physical: not mapped yet */
-#define RANAL_BUF_PHYS_MAPPED    3              /* physical: mapped already */
-#define RANAL_BUF_VIRT_UNMAPPED  4              /* virtual: not mapped yet */
-#define RANAL_BUF_VIRT_MAPPED    5              /* virtual: mapped already */
-
-typedef struct kra_conn
-{
-        struct kra_peer    *rac_peer;           /* owning peer */
-        cfs_list_t          rac_list;          /* stash on peer's conn list */
-        cfs_list_t          rac_hashlist;      /* stash in connection hash table */
-        cfs_list_t          rac_schedlist;     /* schedule (on rad_???_conns) for attention */
-        cfs_list_t          rac_fmaq;          /* txs queued for FMA */
-        cfs_list_t          rac_rdmaq;         /* txs awaiting RDMA completion */
-        cfs_list_t          rac_replyq;        /* txs awaiting replies */
-        __u64               rac_peerstamp;     /* peer's unique stamp */
-        __u64               rac_peer_connstamp;/* peer's unique connection stamp */
-        __u64               rac_my_connstamp;  /* my unique connection stamp */
-        unsigned long       rac_last_tx;       /* when I last sent an FMA message (jiffies) */
-        unsigned long       rac_last_rx;       /* when I last received an FMA messages (jiffies) */
-        long                rac_keepalive;     /* keepalive interval (seconds) */
-        long                rac_timeout;       /* infer peer death if no rx for this many seconds */
-        __u32               rac_cqid;          /* my completion callback id (non-unique) */
-        __u32               rac_tx_seq;        /* tx msg sequence number */
-        __u32               rac_rx_seq;        /* rx msg sequence number */
-       atomic_t        rac_refcount;      /* # users */
-        unsigned int        rac_close_sent;    /* I've sent CLOSE */
-        unsigned int        rac_close_recvd;   /* I've received CLOSE */
-        unsigned int        rac_state;         /* connection state */
-        unsigned int        rac_scheduled;     /* being attented to */
-       spinlock_t          rac_lock;           /* serialise */
-        kra_device_t       *rac_device;        /* which device */
-        RAP_PVOID           rac_rihandle;      /* RA endpoint */
-        kra_msg_t          *rac_rxmsg;         /* incoming message (FMA prefix) */
-        kra_msg_t           rac_msg;           /* keepalive/CLOSE message buffer */
-} kra_conn_t;
-
-#define RANAL_CONN_ESTABLISHED     0
-#define RANAL_CONN_CLOSING         1
-#define RANAL_CONN_CLOSED          2
-
-typedef struct kra_peer {
-       cfs_list_t          rap_list;         /* stash on global peer list */
-       cfs_list_t          rap_connd_list;   /* schedule on kra_connd_peers */
-       cfs_list_t          rap_conns;        /* all active connections */
-       cfs_list_t          rap_tx_queue;     /* msgs waiting for a conn */
-       lnet_nid_t          rap_nid;          /* who's on the other end(s) */
-       __u32               rap_ip;           /* IP address of peer */
-       int                 rap_port;         /* port on which peer listens */
-       atomic_t        rap_refcount;     /* # users */
-       int                 rap_persistence;  /* "known" peer refs */
-       int                 rap_connecting;   /* connection forming */
-       unsigned long       rap_reconnect_time; /* get_seconds() when reconnect OK */
-       unsigned long       rap_reconnect_interval; /* exponential backoff */
-} kra_peer_t;
-
-extern kra_data_t      kranal_data;
-extern kra_tunables_t  kranal_tunables;
-
-extern void kranal_destroy_peer(kra_peer_t *peer);
-extern void kranal_destroy_conn(kra_conn_t *conn);
-
-static inline void
-kranal_peer_addref(kra_peer_t *peer)
-{
-        CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid));
-       LASSERT(atomic_read(&peer->rap_refcount) > 0);
-       atomic_inc(&peer->rap_refcount);
-}
-
-static inline void
-kranal_peer_decref(kra_peer_t *peer)
-{
-        CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid));
-       LASSERT(atomic_read(&peer->rap_refcount) > 0);
-       if (atomic_dec_and_test(&peer->rap_refcount))
-                kranal_destroy_peer(peer);
-}
-
-static inline cfs_list_t *
-kranal_nid2peerlist (lnet_nid_t nid)
-{
-        unsigned int hash = ((unsigned int)nid) % kranal_data.kra_peer_hash_size;
-
-        return (&kranal_data.kra_peers[hash]);
-}
-
-static inline int
-kranal_peer_active(kra_peer_t *peer)
-{
-        /* Am I in the peer hash table? */
-        return (!cfs_list_empty(&peer->rap_list));
-}
-
-static inline void
-kranal_conn_addref(kra_conn_t *conn)
-{
-        CDEBUG(D_NET, "%p->%s\n", conn, 
-               libcfs_nid2str(conn->rac_peer->rap_nid));
-       LASSERT(atomic_read(&conn->rac_refcount) > 0);
-       atomic_inc(&conn->rac_refcount);
-}
-
-static inline void
-kranal_conn_decref(kra_conn_t *conn)
-{
-        CDEBUG(D_NET, "%p->%s\n", conn,
-               libcfs_nid2str(conn->rac_peer->rap_nid));
-       LASSERT(atomic_read(&conn->rac_refcount) > 0);
-       if (atomic_dec_and_test(&conn->rac_refcount))
-                kranal_destroy_conn(conn);
-}
-
-static inline cfs_list_t *
-kranal_cqid2connlist (__u32 cqid)
-{
-        unsigned int hash = cqid % kranal_data.kra_conn_hash_size;
-
-        return (&kranal_data.kra_conns [hash]);
-}
-
-static inline kra_conn_t *
-kranal_cqid2conn_locked (__u32 cqid)
-{
-        cfs_list_t       *conns = kranal_cqid2connlist(cqid);
-        cfs_list_t       *tmp;
-        kra_conn_t       *conn;
-
-        cfs_list_for_each(tmp, conns) {
-                conn = cfs_list_entry(tmp, kra_conn_t, rac_hashlist);
-
-                if (conn->rac_cqid == cqid)
-                        return conn;
-        }
-
-        return NULL;
-}
-
-static inline int
-kranal_tx_mapped (kra_tx_t *tx)
-{
-        return (tx->tx_buftype == RANAL_BUF_VIRT_MAPPED ||
-                tx->tx_buftype == RANAL_BUF_PHYS_MAPPED);
-}
-
-int kranal_startup (lnet_ni_t *ni);
-void kranal_shutdown (lnet_ni_t *ni);
-int kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int kranal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kranal_eager_recv(lnet_ni_t *ni, void *private,
-                      lnet_msg_t *lntmsg, void **new_private);
-int kranal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
-                int delayed, unsigned int niov,
-                struct iovec *iov, lnet_kiov_t *kiov,
-                unsigned int offset, unsigned int mlen, unsigned int rlen);
-int kranal_accept(lnet_ni_t *ni, struct socket *sock);
-
-extern void kranal_free_acceptsock (kra_acceptsock_t *ras);
-extern int kranal_listener_procint (struct ctl_table *table,
-                                    int write, struct file *filp,
-                                    void *buffer, size_t *lenp);
-extern void kranal_update_reaper_timeout (long timeout);
-extern void kranal_tx_done (kra_tx_t *tx, int completion);
-extern void kranal_unlink_peer_locked (kra_peer_t *peer);
-extern void kranal_schedule_conn (kra_conn_t *conn);
-extern int kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid);
-extern int kranal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port);
-extern kra_peer_t *kranal_find_peer_locked (lnet_nid_t nid);
-extern void kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx);
-extern int kranal_del_peer (lnet_nid_t nid);
-extern void kranal_device_callback (RAP_INT32 devid, RAP_PVOID arg);
-extern int kranal_thread_start(int(*fn)(void *arg), void *arg, char *name);
-extern int kranal_connd (void *arg);
-extern int kranal_reaper (void *arg);
-extern int kranal_scheduler (void *arg);
-extern void kranal_close_conn_locked (kra_conn_t *conn, int error);
-extern void kranal_close_conn (kra_conn_t *conn, int error);
-extern void kranal_terminate_conn_locked (kra_conn_t *conn);
-extern void kranal_connect (kra_peer_t *peer);
-extern int kranal_conn_handshake (struct socket *sock, kra_peer_t *peer);
-extern int kranal_tunables_init(void);
-extern void kranal_tunables_fini(void);
-extern void kranal_init_msg(kra_msg_t *msg, int type);
diff --git a/lnet/klnds/ralnd/ralnd_cb.c b/lnet/klnds/ralnd/ralnd_cb.c
deleted file mode 100644 (file)
index f53be8d..0000000
+++ /dev/null
@@ -1,2078 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, 2014, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ralnd/ralnd_cb.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include <asm/page.h>
-#include "ralnd.h"
-
-void
-kranal_device_callback(RAP_INT32 devid, RAP_PVOID arg)
-{
-        kra_device_t *dev;
-        int           i;
-        unsigned long flags;
-
-        CDEBUG(D_NET, "callback for device %d\n", devid);
-
-        for (i = 0; i < kranal_data.kra_ndevs; i++) {
-
-                dev = &kranal_data.kra_devices[i];
-                if (dev->rad_id != devid)
-                        continue;
-
-               spin_lock_irqsave(&dev->rad_lock, flags);
-
-               if (!dev->rad_ready) {
-                       dev->rad_ready = 1;
-                       wake_up(&dev->rad_waitq);
-               }
-
-               spin_unlock_irqrestore(&dev->rad_lock, flags);
-                return;
-        }
-
-        CWARN("callback for unknown device %d\n", devid);
-}
-
-void
-kranal_schedule_conn(kra_conn_t *conn)
-{
-        kra_device_t    *dev = conn->rac_device;
-        unsigned long    flags;
-
-       spin_lock_irqsave(&dev->rad_lock, flags);
-
-       if (!conn->rac_scheduled) {
-               kranal_conn_addref(conn);       /* +1 ref for scheduler */
-               conn->rac_scheduled = 1;
-               cfs_list_add_tail(&conn->rac_schedlist, &dev->rad_ready_conns);
-               wake_up(&dev->rad_waitq);
-       }
-
-       spin_unlock_irqrestore(&dev->rad_lock, flags);
-}
-
-kra_tx_t *
-kranal_get_idle_tx (void)
-{
-        unsigned long  flags;
-        kra_tx_t      *tx;
-
-       spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
-
-        if (cfs_list_empty(&kranal_data.kra_idle_txs)) {
-               spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
-                return NULL;
-        }
-
-        tx = cfs_list_entry(kranal_data.kra_idle_txs.next, kra_tx_t, tx_list);
-        cfs_list_del(&tx->tx_list);
-
-        /* Allocate a new completion cookie.  It might not be needed, but we've
-         * got a lock right now... */
-        tx->tx_cookie = kranal_data.kra_next_tx_cookie++;
-
-       spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
-
-        LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-        LASSERT (tx->tx_msg.ram_type == RANAL_MSG_NONE);
-        LASSERT (tx->tx_conn == NULL);
-        LASSERT (tx->tx_lntmsg[0] == NULL);
-        LASSERT (tx->tx_lntmsg[1] == NULL);
-
-        return tx;
-}
-
-void
-kranal_init_msg(kra_msg_t *msg, int type)
-{
-        msg->ram_magic = RANAL_MSG_MAGIC;
-        msg->ram_version = RANAL_MSG_VERSION;
-        msg->ram_type = type;
-        msg->ram_srcnid = kranal_data.kra_ni->ni_nid;
-        /* ram_connstamp gets set when FMA is sent */
-}
-
-kra_tx_t *
-kranal_new_tx_msg (int type)
-{
-        kra_tx_t *tx = kranal_get_idle_tx();
-
-        if (tx != NULL)
-                kranal_init_msg(&tx->tx_msg, type);
-
-        return tx;
-}
-
-int
-kranal_setup_immediate_buffer (kra_tx_t *tx, 
-                               unsigned int niov, struct iovec *iov,
-                               int offset, int nob)
-
-{
-        /* For now this is almost identical to kranal_setup_virt_buffer, but we
-         * could "flatten" the payload into a single contiguous buffer ready
-         * for sending direct over an FMA if we ever needed to. */
-
-        LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-        LASSERT (nob >= 0);
-
-        if (nob == 0) {
-                tx->tx_buffer = NULL;
-        } else {
-                LASSERT (niov > 0);
-
-                while (offset >= iov->iov_len) {
-                        offset -= iov->iov_len;
-                        niov--;
-                        iov++;
-                        LASSERT (niov > 0);
-                }
-
-                if (nob > iov->iov_len - offset) {
-                        CERROR("Can't handle multiple vaddr fragments\n");
-                        return -EMSGSIZE;
-                }
-
-                tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset);
-        }
-
-        tx->tx_buftype = RANAL_BUF_IMMEDIATE;
-        tx->tx_nob = nob;
-        return 0;
-}
-
-int
-kranal_setup_virt_buffer (kra_tx_t *tx, 
-                          unsigned int niov, struct iovec *iov,
-                          int offset, int nob)
-
-{
-        LASSERT (nob > 0);
-        LASSERT (niov > 0);
-        LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                niov--;
-                iov++;
-                LASSERT (niov > 0);
-        }
-
-        if (nob > iov->iov_len - offset) {
-                CERROR("Can't handle multiple vaddr fragments\n");
-                return -EMSGSIZE;
-        }
-
-        tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED;
-        tx->tx_nob = nob;
-        tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset);
-        return 0;
-}
-
-int
-kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, lnet_kiov_t *kiov,
-                          int offset, int nob)
-{
-        RAP_PHYS_REGION *phys = tx->tx_phys;
-        int              resid;
-
-        CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
-        LASSERT (nob > 0);
-        LASSERT (nkiov > 0);
-        LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-
-        while (offset >= kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                nkiov--;
-                kiov++;
-                LASSERT (nkiov > 0);
-        }
-
-        tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED;
-        tx->tx_nob = nob;
-        tx->tx_buffer = (void *)((unsigned long)(kiov->kiov_offset + offset));
-
-       phys->Address = page_to_phys(kiov->kiov_page);
-        phys++;
-
-        resid = nob - (kiov->kiov_len - offset);
-        while (resid > 0) {
-                kiov++;
-                nkiov--;
-                LASSERT (nkiov > 0);
-
-                if (kiov->kiov_offset != 0 ||
-                    ((resid > PAGE_SIZE) &&
-                     kiov->kiov_len < PAGE_SIZE)) {
-                        /* Can't have gaps */
-                        CERROR("Can't make payload contiguous in I/O VM:"
-                               "page %d, offset %d, len %d \n",
-                               (int)(phys - tx->tx_phys),
-                               kiov->kiov_offset, kiov->kiov_len);
-                        return -EINVAL;
-                }
-
-                if ((phys - tx->tx_phys) == LNET_MAX_IOV) {
-                        CERROR ("payload too big (%d)\n", (int)(phys - tx->tx_phys));
-                        return -EMSGSIZE;
-                }
-
-               phys->Address = page_to_phys(kiov->kiov_page);
-                phys++;
-
-                resid -= PAGE_SIZE;
-        }
-
-        tx->tx_phys_npages = phys - tx->tx_phys;
-        return 0;
-}
-
-static inline int
-kranal_setup_rdma_buffer (kra_tx_t *tx, unsigned int niov,
-                          struct iovec *iov, lnet_kiov_t *kiov,
-                          int offset, int nob)
-{
-        LASSERT ((iov == NULL) != (kiov == NULL));
-
-        if (kiov != NULL)
-                return kranal_setup_phys_buffer(tx, niov, kiov, offset, nob);
-
-        return kranal_setup_virt_buffer(tx, niov, iov, offset, nob);
-}
-
-int
-kranal_map_buffer (kra_tx_t *tx)
-{
-        kra_conn_t     *conn = tx->tx_conn;
-        kra_device_t   *dev = conn->rac_device;
-        RAP_RETURN      rrc;
-
-        LASSERT (current == dev->rad_scheduler);
-
-        switch (tx->tx_buftype) {
-        default:
-                LBUG();
-
-        case RANAL_BUF_NONE:
-        case RANAL_BUF_IMMEDIATE:
-        case RANAL_BUF_PHYS_MAPPED:
-        case RANAL_BUF_VIRT_MAPPED:
-                return 0;
-
-        case RANAL_BUF_PHYS_UNMAPPED:
-                rrc = RapkRegisterPhys(dev->rad_handle,
-                                       tx->tx_phys, tx->tx_phys_npages,
-                                       &tx->tx_map_key);
-                if (rrc != RAP_SUCCESS) {
-                        CERROR ("Can't map %d pages: dev %d "
-                                "phys %u pp %u, virt %u nob %lu\n",
-                                tx->tx_phys_npages, dev->rad_id, 
-                                dev->rad_nphysmap, dev->rad_nppphysmap,
-                                dev->rad_nvirtmap, dev->rad_nobvirtmap);
-                        return -ENOMEM; /* assume insufficient resources */
-                }
-
-                dev->rad_nphysmap++;
-                dev->rad_nppphysmap += tx->tx_phys_npages;
-
-                tx->tx_buftype = RANAL_BUF_PHYS_MAPPED;
-                return 0;
-
-        case RANAL_BUF_VIRT_UNMAPPED:
-                rrc = RapkRegisterMemory(dev->rad_handle,
-                                         tx->tx_buffer, tx->tx_nob,
-                                         &tx->tx_map_key);
-                if (rrc != RAP_SUCCESS) {
-                        CERROR ("Can't map %d bytes: dev %d "
-                                "phys %u pp %u, virt %u nob %lu\n",
-                                tx->tx_nob, dev->rad_id, 
-                                dev->rad_nphysmap, dev->rad_nppphysmap,
-                                dev->rad_nvirtmap, dev->rad_nobvirtmap);
-                        return -ENOMEM; /* assume insufficient resources */
-                }
-
-                dev->rad_nvirtmap++;
-                dev->rad_nobvirtmap += tx->tx_nob;
-
-                tx->tx_buftype = RANAL_BUF_VIRT_MAPPED;
-                return 0;
-        }
-}
-
-void
-kranal_unmap_buffer (kra_tx_t *tx)
-{
-        kra_device_t   *dev;
-        RAP_RETURN      rrc;
-
-        switch (tx->tx_buftype) {
-        default:
-                LBUG();
-
-        case RANAL_BUF_NONE:
-        case RANAL_BUF_IMMEDIATE:
-        case RANAL_BUF_PHYS_UNMAPPED:
-        case RANAL_BUF_VIRT_UNMAPPED:
-                break;
-
-        case RANAL_BUF_PHYS_MAPPED:
-                LASSERT (tx->tx_conn != NULL);
-                dev = tx->tx_conn->rac_device;
-                LASSERT (current == dev->rad_scheduler);
-                rrc = RapkDeregisterMemory(dev->rad_handle, NULL,
-                                           &tx->tx_map_key);
-                LASSERT (rrc == RAP_SUCCESS);
-
-                dev->rad_nphysmap--;
-                dev->rad_nppphysmap -= tx->tx_phys_npages;
-
-                tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED;
-                break;
-
-        case RANAL_BUF_VIRT_MAPPED:
-                LASSERT (tx->tx_conn != NULL);
-                dev = tx->tx_conn->rac_device;
-                LASSERT (current == dev->rad_scheduler);
-                rrc = RapkDeregisterMemory(dev->rad_handle, tx->tx_buffer,
-                                           &tx->tx_map_key);
-                LASSERT (rrc == RAP_SUCCESS);
-
-                dev->rad_nvirtmap--;
-                dev->rad_nobvirtmap -= tx->tx_nob;
-
-                tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED;
-                break;
-        }
-}
-
-void
-kranal_tx_done (kra_tx_t *tx, int completion)
-{
-       lnet_msg_t      *lnetmsg[2];
-       unsigned long    flags;
-       int              i;
-
-       LASSERT (!in_interrupt());
-
-       kranal_unmap_buffer(tx);
-
-       lnetmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
-       lnetmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
-
-       tx->tx_buftype = RANAL_BUF_NONE;
-       tx->tx_msg.ram_type = RANAL_MSG_NONE;
-       tx->tx_conn = NULL;
-
-       spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
-
-       cfs_list_add_tail(&tx->tx_list, &kranal_data.kra_idle_txs);
-
-       spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
-
-       /* finalize AFTER freeing lnet msgs */
-       for (i = 0; i < 2; i++) {
-               if (lnetmsg[i] == NULL)
-                       continue;
-
-               lnet_finalize(kranal_data.kra_ni, lnetmsg[i], completion);
-       }
-}
-
-kra_conn_t *
-kranal_find_conn_locked (kra_peer_t *peer)
-{
-        cfs_list_t *tmp;
-
-        /* just return the first connection */
-        cfs_list_for_each (tmp, &peer->rap_conns) {
-                return cfs_list_entry(tmp, kra_conn_t, rac_list);
-        }
-
-        return NULL;
-}
-
-void
-kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx)
-{
-        unsigned long    flags;
-
-        tx->tx_conn = conn;
-
-       spin_lock_irqsave(&conn->rac_lock, flags);
-        cfs_list_add_tail(&tx->tx_list, &conn->rac_fmaq);
-        tx->tx_qtime = jiffies;
-       spin_unlock_irqrestore(&conn->rac_lock, flags);
-
-        kranal_schedule_conn(conn);
-}
-
-void
-kranal_launch_tx (kra_tx_t *tx, lnet_nid_t nid)
-{
-        unsigned long    flags;
-        kra_peer_t      *peer;
-        kra_conn_t      *conn;
-        int              rc;
-        int              retry;
-       rwlock_t    *g_lock = &kranal_data.kra_global_lock;
-
-        /* If I get here, I've committed to send, so I complete the tx with
-         * failure on any problems */
-
-        LASSERT (tx->tx_conn == NULL);      /* only set when assigned a conn */
-
-        for (retry = 0; ; retry = 1) {
-
-               read_lock(g_lock);
-
-                peer = kranal_find_peer_locked(nid);
-                if (peer != NULL) {
-                        conn = kranal_find_conn_locked(peer);
-                        if (conn != NULL) {
-                                kranal_post_fma(conn, tx);
-                               read_unlock(g_lock);
-                                return;
-                        }
-                }
-                
-                /* Making connections; I'll need a write lock... */
-               read_unlock(g_lock);
-               write_lock_irqsave(g_lock, flags);
-
-                peer = kranal_find_peer_locked(nid);
-                if (peer != NULL)
-                        break;
-                
-               write_unlock_irqrestore(g_lock, flags);
-                
-                if (retry) {
-                        CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
-                        kranal_tx_done(tx, -EHOSTUNREACH);
-                        return;
-                }
-
-                rc = kranal_add_persistent_peer(nid, LNET_NIDADDR(nid),
-                                                lnet_acceptor_port());
-                if (rc != 0) {
-                        CERROR("Can't add peer %s: %d\n",
-                               libcfs_nid2str(nid), rc);
-                        kranal_tx_done(tx, rc);
-                        return;
-                }
-        }
-        
-        conn = kranal_find_conn_locked(peer);
-        if (conn != NULL) {
-                /* Connection exists; queue message on it */
-                kranal_post_fma(conn, tx);
-               write_unlock_irqrestore(g_lock, flags);
-                return;
-        }
-                        
-        LASSERT (peer->rap_persistence > 0);
-
-        if (!peer->rap_connecting) {
-                LASSERT (cfs_list_empty(&peer->rap_tx_queue));
-
-                if (!(peer->rap_reconnect_interval == 0 || /* first attempt */
-                      cfs_time_aftereq(jiffies, peer->rap_reconnect_time))) {
-                       write_unlock_irqrestore(g_lock, flags);
-                        kranal_tx_done(tx, -EHOSTUNREACH);
-                        return;
-                }
-
-                peer->rap_connecting = 1;
-                kranal_peer_addref(peer); /* extra ref for connd */
-
-               spin_lock(&kranal_data.kra_connd_lock);
-
-               cfs_list_add_tail(&peer->rap_connd_list,
-                             &kranal_data.kra_connd_peers);
-               wake_up(&kranal_data.kra_connd_waitq);
-
-               spin_unlock(&kranal_data.kra_connd_lock);
-        }
-
-        /* A connection is being established; queue the message... */
-        cfs_list_add_tail(&tx->tx_list, &peer->rap_tx_queue);
-
-       write_unlock_irqrestore(g_lock, flags);
-}
-
-void
-kranal_rdma(kra_tx_t *tx, int type,
-            kra_rdma_desc_t *sink, int nob, __u64 cookie)
-{
-        kra_conn_t   *conn = tx->tx_conn;
-        RAP_RETURN    rrc;
-        unsigned long flags;
-
-        LASSERT (kranal_tx_mapped(tx));
-        LASSERT (nob <= sink->rard_nob);
-        LASSERT (nob <= tx->tx_nob);
-
-        /* No actual race with scheduler sending CLOSE (I'm she!) */
-        LASSERT (current == conn->rac_device->rad_scheduler);
-
-        memset(&tx->tx_rdma_desc, 0, sizeof(tx->tx_rdma_desc));
-        tx->tx_rdma_desc.SrcPtr.AddressBits = (__u64)((unsigned long)tx->tx_buffer);
-        tx->tx_rdma_desc.SrcKey = tx->tx_map_key;
-        tx->tx_rdma_desc.DstPtr = sink->rard_addr;
-        tx->tx_rdma_desc.DstKey = sink->rard_key;
-        tx->tx_rdma_desc.Length = nob;
-        tx->tx_rdma_desc.AppPtr = tx;
-
-        /* prep final completion message */
-        kranal_init_msg(&tx->tx_msg, type);
-        tx->tx_msg.ram_u.completion.racm_cookie = cookie;
-
-        if (nob == 0) { /* Immediate completion */
-                kranal_post_fma(conn, tx);
-                return;
-        }
-
-        LASSERT (!conn->rac_close_sent); /* Don't lie (CLOSE == RDMA idle) */
-
-        rrc = RapkPostRdma(conn->rac_rihandle, &tx->tx_rdma_desc);
-        LASSERT (rrc == RAP_SUCCESS);
-
-       spin_lock_irqsave(&conn->rac_lock, flags);
-        cfs_list_add_tail(&tx->tx_list, &conn->rac_rdmaq);
-        tx->tx_qtime = jiffies;
-       spin_unlock_irqrestore(&conn->rac_lock, flags);
-}
-
-int
-kranal_consume_rxmsg (kra_conn_t *conn, void *buffer, int nob)
-{
-        __u32      nob_received = nob;
-        RAP_RETURN rrc;
-
-        LASSERT (conn->rac_rxmsg != NULL);
-        CDEBUG(D_NET, "Consuming %p\n", conn);
-
-        rrc = RapkFmaCopyOut(conn->rac_rihandle, buffer,
-                             &nob_received, sizeof(kra_msg_t));
-        LASSERT (rrc == RAP_SUCCESS);
-
-        conn->rac_rxmsg = NULL;
-
-        if (nob_received < nob) {
-                CWARN("Incomplete immediate msg from %s: expected %d, got %d\n",
-                      libcfs_nid2str(conn->rac_peer->rap_nid), 
-                      nob, nob_received);
-                return -EPROTO;
-        }
-
-        return 0;
-}
-
-int
-kranal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
-        lnet_hdr_t       *hdr = &lntmsg->msg_hdr;
-        int               type = lntmsg->msg_type;
-        lnet_process_id_t target = lntmsg->msg_target;
-        int               target_is_router = lntmsg->msg_target_is_router;
-        int               routing = lntmsg->msg_routing;
-        unsigned int      niov = lntmsg->msg_niov;
-        struct iovec     *iov = lntmsg->msg_iov;
-        lnet_kiov_t      *kiov = lntmsg->msg_kiov;
-        unsigned int      offset = lntmsg->msg_offset;
-        unsigned int      nob = lntmsg->msg_len;
-        kra_tx_t         *tx;
-        int               rc;
-
-        /* NB 'private' is different depending on what we're sending.... */
-
-       CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
-              nob, niov, libcfs_id2str(target));
-
-       LASSERT (nob == 0 || niov > 0);
-       LASSERT (niov <= LNET_MAX_IOV);
-
-       LASSERT (!in_interrupt());
-       /* payload is either all vaddrs or all pages */
-       LASSERT (!(kiov != NULL && iov != NULL));
-
-       if (routing) {
-               CERROR ("Can't route\n");
-               return -EIO;
-       }
-
-        switch(type) {
-        default:
-                LBUG();
-
-        case LNET_MSG_ACK:
-                LASSERT (nob == 0);
-                break;
-
-        case LNET_MSG_GET:
-                LASSERT (niov == 0);
-                LASSERT (nob == 0);
-                /* We have to consider the eventual sink buffer rather than any
-                 * payload passed here (there isn't any, and strictly, looking
-                 * inside lntmsg is a layering violation).  We send a simple
-                 * IMMEDIATE GET if the sink buffer is mapped already and small
-                 * enough for FMA */
-
-                if (routing || target_is_router)
-                        break;                  /* send IMMEDIATE */
-
-                if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0 &&
-                    lntmsg->msg_md->md_length <= RANAL_FMA_MAX_DATA &&
-                    lntmsg->msg_md->md_length <= *kranal_tunables.kra_max_immediate)
-                        break;                  /* send IMMEDIATE */
-
-                tx = kranal_new_tx_msg(RANAL_MSG_GET_REQ);
-                if (tx == NULL)
-                        return -ENOMEM;
-
-                if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
-                        rc = kranal_setup_virt_buffer(tx, lntmsg->msg_md->md_niov,
-                                                      lntmsg->msg_md->md_iov.iov,
-                                                      0, lntmsg->msg_md->md_length);
-                else
-                        rc = kranal_setup_phys_buffer(tx, lntmsg->msg_md->md_niov,
-                                                      lntmsg->msg_md->md_iov.kiov,
-                                                      0, lntmsg->msg_md->md_length);
-                if (rc != 0) {
-                        kranal_tx_done(tx, rc);
-                        return -EIO;
-                }
-
-                tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg);
-                if (tx->tx_lntmsg[1] == NULL) {
-                        CERROR("Can't create reply for GET to %s\n", 
-                               libcfs_nid2str(target.nid));
-                        kranal_tx_done(tx, rc);
-                        return -EIO;
-                }
-
-                tx->tx_lntmsg[0] = lntmsg;
-                tx->tx_msg.ram_u.get.ragm_hdr = *hdr;
-                /* rest of tx_msg is setup just before it is sent */
-                kranal_launch_tx(tx, target.nid);
-                return 0;
-
-        case LNET_MSG_REPLY:
-        case LNET_MSG_PUT:
-                if (kiov == NULL &&             /* not paged */
-                    nob <= RANAL_FMA_MAX_DATA && /* small enough */
-                    nob <= *kranal_tunables.kra_max_immediate)
-                        break;                  /* send IMMEDIATE */
-
-                tx = kranal_new_tx_msg(RANAL_MSG_PUT_REQ);
-                if (tx == NULL)
-                        return -ENOMEM;
-
-                rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, nob);
-                if (rc != 0) {
-                        kranal_tx_done(tx, rc);
-                        return -EIO;
-                }
-
-                tx->tx_lntmsg[0] = lntmsg;
-                tx->tx_msg.ram_u.putreq.raprm_hdr = *hdr;
-                /* rest of tx_msg is setup just before it is sent */
-                kranal_launch_tx(tx, target.nid);
-                return 0;
-        }
-
-        /* send IMMEDIATE */
-
-        LASSERT (kiov == NULL);
-        LASSERT (nob <= RANAL_FMA_MAX_DATA);
-
-        tx = kranal_new_tx_msg(RANAL_MSG_IMMEDIATE);
-        if (tx == NULL)
-                return -ENOMEM;
-
-        rc = kranal_setup_immediate_buffer(tx, niov, iov, offset, nob);
-        if (rc != 0) {
-                kranal_tx_done(tx, rc);
-                return -EIO;
-        }
-
-        tx->tx_msg.ram_u.immediate.raim_hdr = *hdr;
-        tx->tx_lntmsg[0] = lntmsg;
-        kranal_launch_tx(tx, target.nid);
-        return 0;
-}
-
-void
-kranal_reply(lnet_ni_t *ni, kra_conn_t *conn, lnet_msg_t *lntmsg)
-{
-        kra_msg_t     *rxmsg = conn->rac_rxmsg;
-        unsigned int   niov = lntmsg->msg_niov;
-        struct iovec  *iov = lntmsg->msg_iov;
-        lnet_kiov_t   *kiov = lntmsg->msg_kiov;
-        unsigned int   offset = lntmsg->msg_offset;
-        unsigned int   nob = lntmsg->msg_len;
-        kra_tx_t      *tx;
-        int            rc;
-
-        tx = kranal_get_idle_tx();
-        if (tx == NULL)
-                goto failed_0;
-
-        rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, nob);
-        if (rc != 0)
-                goto failed_1;
-
-        tx->tx_conn = conn;
-
-        rc = kranal_map_buffer(tx);
-        if (rc != 0)
-                goto failed_1;
-
-        tx->tx_lntmsg[0] = lntmsg;
-
-        kranal_rdma(tx, RANAL_MSG_GET_DONE,
-                    &rxmsg->ram_u.get.ragm_desc, nob,
-                    rxmsg->ram_u.get.ragm_cookie);
-        return;
-
- failed_1:
-        kranal_tx_done(tx, -EIO);
- failed_0:
-        lnet_finalize(ni, lntmsg, -EIO);
-}
-
-int
-kranal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
-                   void **new_private)
-{
-        kra_conn_t *conn = (kra_conn_t *)private;
-
-        LCONSOLE_ERROR_MSG(0x12b, "Dropping message from %s: no buffers free.\n",
-                           libcfs_nid2str(conn->rac_peer->rap_nid));
-
-        return -EDEADLK;
-}
-
-int
-kranal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
-             int delayed, unsigned int niov, 
-             struct iovec *iov, lnet_kiov_t *kiov,
-             unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
-       kra_conn_t  *conn = private;
-       kra_msg_t   *rxmsg = conn->rac_rxmsg;
-       kra_tx_t    *tx;
-       void        *buffer;
-       int          rc;
-
-       LASSERT (mlen <= rlen);
-       LASSERT (!in_interrupt());
-       /* Either all pages or all vaddrs */
-       LASSERT (!(kiov != NULL && iov != NULL));
-
-       CDEBUG(D_NET, "conn %p, rxmsg %p, lntmsg %p\n", conn, rxmsg, lntmsg);
-
-        switch(rxmsg->ram_type) {
-        default:
-                LBUG();
-
-        case RANAL_MSG_IMMEDIATE:
-                if (mlen == 0) {
-                        buffer = NULL;
-                } else if (kiov != NULL) {
-                        CERROR("Can't recv immediate into paged buffer\n");
-                        return -EIO;
-                } else {
-                        LASSERT (niov > 0);
-                        while (offset >= iov->iov_len) {
-                                offset -= iov->iov_len;
-                                iov++;
-                                niov--;
-                                LASSERT (niov > 0);
-                        }
-                        if (mlen > iov->iov_len - offset) {
-                                CERROR("Can't handle immediate frags\n");
-                                return -EIO;
-                        }
-                        buffer = ((char *)iov->iov_base) + offset;
-                }
-                rc = kranal_consume_rxmsg(conn, buffer, mlen);
-                lnet_finalize(ni, lntmsg, (rc == 0) ? 0 : -EIO);
-                return 0;
-
-        case RANAL_MSG_PUT_REQ:
-                tx = kranal_new_tx_msg(RANAL_MSG_PUT_ACK);
-                if (tx == NULL) {
-                        kranal_consume_rxmsg(conn, NULL, 0);
-                        return -ENOMEM;
-                }
-                
-                rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, mlen);
-                if (rc != 0) {
-                        kranal_tx_done(tx, rc);
-                        kranal_consume_rxmsg(conn, NULL, 0);
-                        return -EIO;
-                }
-
-                tx->tx_conn = conn;
-                rc = kranal_map_buffer(tx);
-                if (rc != 0) {
-                        kranal_tx_done(tx, rc);
-                        kranal_consume_rxmsg(conn, NULL, 0);
-                        return -EIO;
-                }
-
-                tx->tx_msg.ram_u.putack.rapam_src_cookie =
-                        conn->rac_rxmsg->ram_u.putreq.raprm_cookie;
-                tx->tx_msg.ram_u.putack.rapam_dst_cookie = tx->tx_cookie;
-                tx->tx_msg.ram_u.putack.rapam_desc.rard_key = tx->tx_map_key;
-                tx->tx_msg.ram_u.putack.rapam_desc.rard_addr.AddressBits =
-                        (__u64)((unsigned long)tx->tx_buffer);
-                tx->tx_msg.ram_u.putack.rapam_desc.rard_nob = mlen;
-
-                tx->tx_lntmsg[0] = lntmsg; /* finalize this on RDMA_DONE */
-
-                kranal_post_fma(conn, tx);
-                kranal_consume_rxmsg(conn, NULL, 0);
-                return 0;
-
-        case RANAL_MSG_GET_REQ:
-                if (lntmsg != NULL) {
-                        /* Matched! */
-                        kranal_reply(ni, conn, lntmsg);
-                } else {
-                        /* No match */
-                        tx = kranal_new_tx_msg(RANAL_MSG_GET_NAK);
-                        if (tx != NULL) {
-                                tx->tx_msg.ram_u.completion.racm_cookie =
-                                        rxmsg->ram_u.get.ragm_cookie;
-                                kranal_post_fma(conn, tx);
-                        }
-                }
-                kranal_consume_rxmsg(conn, NULL, 0);
-                return 0;
-        }
-}
-
-int
-kranal_thread_start(int(*fn)(void *arg), void *arg, char *name)
-{
-       struct task_struct *task = cfs_thread_run(fn, arg, name);
-
-       if (!IS_ERR(task))
-               atomic_inc(&kranal_data.kra_nthreads);
-       return PTR_ERR(task);
-}
-
-void
-kranal_thread_fini (void)
-{
-       atomic_dec(&kranal_data.kra_nthreads);
-}
-
-int
-kranal_check_conn_timeouts (kra_conn_t *conn)
-{
-        kra_tx_t          *tx;
-        cfs_list_t        *ttmp;
-        unsigned long      flags;
-        long               timeout;
-        unsigned long      now = jiffies;
-
-        LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED ||
-                 conn->rac_state == RANAL_CONN_CLOSING);
-
-       if (!conn->rac_close_sent &&
-           cfs_time_aftereq(now, conn->rac_last_tx +
-                            msecs_to_jiffies(conn->rac_keepalive *
-                                             MSEC_PER_SEC))) {
-               /* not sent in a while; schedule conn so scheduler sends a keepalive */
-               CDEBUG(D_NET, "Scheduling keepalive %p->%s\n",
-                      conn, libcfs_nid2str(conn->rac_peer->rap_nid));
-               kranal_schedule_conn(conn);
-       }
-
-       timeout = msecs_to_jiffies(conn->rac_timeout * MSEC_PER_SEC);
-
-       if (!conn->rac_close_recvd &&
-           cfs_time_aftereq(now, conn->rac_last_rx + timeout)) {
-               CERROR("%s received from %s within %lu seconds\n",
-                      (conn->rac_state == RANAL_CONN_ESTABLISHED) ?
-                      "Nothing" : "CLOSE not",
-                      libcfs_nid2str(conn->rac_peer->rap_nid),
-                      jiffies_to_msecs(now - conn->rac_last_rx)/MSEC_PER_SEC);
-               return -ETIMEDOUT;
-       }
-
-        if (conn->rac_state != RANAL_CONN_ESTABLISHED)
-                return 0;
-
-        /* Check the conn's queues are moving.  These are "belt+braces" checks,
-         * in case of hardware/software errors that make this conn seem
-         * responsive even though it isn't progressing its message queues. */
-
-       spin_lock_irqsave(&conn->rac_lock, flags);
-
-       cfs_list_for_each (ttmp, &conn->rac_fmaq) {
-               tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
-
-               if (cfs_time_aftereq(now, tx->tx_qtime + timeout)) {
-                       spin_unlock_irqrestore(&conn->rac_lock, flags);
-                       CERROR("tx on fmaq for %s blocked %lu seconds\n",
-                              libcfs_nid2str(conn->rac_peer->rap_nid),
-                              jiffies_to_msecs(now-tx->tx_qtime)/MSEC_PER_SEC);
-                       return -ETIMEDOUT;
-               }
-       }
-
-       cfs_list_for_each (ttmp, &conn->rac_rdmaq) {
-               tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
-
-               if (cfs_time_aftereq(now, tx->tx_qtime + timeout)) {
-                       spin_unlock_irqrestore(&conn->rac_lock, flags);
-                       CERROR("tx on rdmaq for %s blocked %lu seconds\n",
-                              libcfs_nid2str(conn->rac_peer->rap_nid),
-                              jiffies_to_msecs(now-tx->tx_qtime)/MSEC_PER_SEC);
-                       return -ETIMEDOUT;
-               }
-       }
-
-       cfs_list_for_each (ttmp, &conn->rac_replyq) {
-               tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
-
-               if (cfs_time_aftereq(now, tx->tx_qtime + timeout)) {
-                       spin_unlock_irqrestore(&conn->rac_lock, flags);
-                       CERROR("tx on replyq for %s blocked %lu seconds\n",
-                              libcfs_nid2str(conn->rac_peer->rap_nid),
-                              jiffies_to_msecs(now-tx->tx_qtime)/MSEC_PER_SEC);
-                       return -ETIMEDOUT;
-               }
-       }
-
-       spin_unlock_irqrestore(&conn->rac_lock, flags);
-        return 0;
-}
-
-void
-kranal_reaper_check (int idx, unsigned long *min_timeoutp)
-{
-        cfs_list_t        *conns = &kranal_data.kra_conns[idx];
-        cfs_list_t        *ctmp;
-        kra_conn_t        *conn;
-        unsigned long      flags;
-        int                rc;
-
- again:
-        /* NB. We expect to check all the conns and not find any problems, so
-         * we just use a shared lock while we take a look... */
-       read_lock(&kranal_data.kra_global_lock);
-
-        cfs_list_for_each (ctmp, conns) {
-                conn = cfs_list_entry(ctmp, kra_conn_t, rac_hashlist);
-
-                if (conn->rac_timeout < *min_timeoutp )
-                        *min_timeoutp = conn->rac_timeout;
-                if (conn->rac_keepalive < *min_timeoutp )
-                        *min_timeoutp = conn->rac_keepalive;
-
-                rc = kranal_check_conn_timeouts(conn);
-                if (rc == 0)
-                        continue;
-
-                kranal_conn_addref(conn);
-               read_unlock(&kranal_data.kra_global_lock);
-
-                CERROR("Conn to %s, cqid %d timed out\n",
-                       libcfs_nid2str(conn->rac_peer->rap_nid), 
-                       conn->rac_cqid);
-
-               write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-                switch (conn->rac_state) {
-                default:
-                        LBUG();
-
-                case RANAL_CONN_ESTABLISHED:
-                        kranal_close_conn_locked(conn, -ETIMEDOUT);
-                        break;
-
-                case RANAL_CONN_CLOSING:
-                        kranal_terminate_conn_locked(conn);
-                        break;
-                }
-
-               write_unlock_irqrestore(&kranal_data.kra_global_lock,
-                                            flags);
-
-                kranal_conn_decref(conn);
-
-                /* start again now I've dropped the lock */
-                goto again;
-        }
-
-       read_unlock(&kranal_data.kra_global_lock);
-}
-
-int
-kranal_connd (void *arg)
-{
-       long               id = (long)arg;
-       wait_queue_t     wait;
-       unsigned long      flags;
-       kra_peer_t        *peer;
-       kra_acceptsock_t  *ras;
-       int                did_something;
-
-       cfs_block_allsigs();
-
-       init_waitqueue_entry_current(&wait);
-
-       spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-
-       while (!kranal_data.kra_shutdown) {
-               did_something = 0;
-
-               if (!cfs_list_empty(&kranal_data.kra_connd_acceptq)) {
-                       ras = cfs_list_entry(kranal_data.kra_connd_acceptq.next,
-                                            kra_acceptsock_t, ras_list);
-                       cfs_list_del(&ras->ras_list);
-
-                       spin_unlock_irqrestore(&kranal_data.kra_connd_lock,
-                                                  flags);
-
-                       CDEBUG(D_NET,"About to handshake someone\n");
-
-                       kranal_conn_handshake(ras->ras_sock, NULL);
-                       kranal_free_acceptsock(ras);
-
-                       CDEBUG(D_NET,"Finished handshaking someone\n");
-
-                       spin_lock_irqsave(&kranal_data.kra_connd_lock,
-                                             flags);
-                       did_something = 1;
-               }
-
-               if (!cfs_list_empty(&kranal_data.kra_connd_peers)) {
-                       peer = cfs_list_entry(kranal_data.kra_connd_peers.next,
-                                             kra_peer_t, rap_connd_list);
-
-                       cfs_list_del_init(&peer->rap_connd_list);
-                       spin_unlock_irqrestore(&kranal_data.kra_connd_lock,
-                                                  flags);
-
-                       kranal_connect(peer);
-                       kranal_peer_decref(peer);
-
-                       spin_lock_irqsave(&kranal_data.kra_connd_lock,
-                                             flags);
-                       did_something = 1;
-               }
-
-               if (did_something)
-                       continue;
-
-               set_current_state(TASK_INTERRUPTIBLE);
-               add_wait_queue_exclusive(&kranal_data.kra_connd_waitq, &wait);
-
-               spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
-               waitq_wait(&wait, TASK_INTERRUPTIBLE);
-
-               set_current_state(TASK_RUNNING);
-               remove_wait_queue(&kranal_data.kra_connd_waitq, &wait);
-
-               spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-       }
-
-       spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
-       kranal_thread_fini();
-       return 0;
-}
-
-void
-kranal_update_reaper_timeout(long timeout)
-{
-        unsigned long   flags;
-
-        LASSERT (timeout > 0);
-
-       spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
-        if (timeout < kranal_data.kra_new_min_timeout)
-                kranal_data.kra_new_min_timeout = timeout;
-
-       spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-}
-
-int
-kranal_reaper (void *arg)
-{
-       wait_queue_t     wait;
-       unsigned long      flags;
-       long               timeout;
-       int                i;
-       int                conn_entries = kranal_data.kra_conn_hash_size;
-       int                conn_index = 0;
-       int                base_index = conn_entries - 1;
-       unsigned long      next_check_time = jiffies;
-       long               next_min_timeout = MAX_SCHEDULE_TIMEOUT;
-       long               current_min_timeout = 1;
-
-       cfs_block_allsigs();
-
-       init_waitqueue_entry_current(&wait);
-
-       spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
-       while (!kranal_data.kra_shutdown) {
-               /* I wake up every 'p' seconds to check for timeouts on some
-                * more peers.  I try to check every connection 'n' times
-                * within the global minimum of all keepalive and timeout
-                * intervals, to ensure I attend to every connection within
-                * (n+1)/n times its timeout intervals. */
-               const int     p = 1;
-               const int     n = 3;
-               unsigned long min_timeout;
-               int           chunk;
-
-               /* careful with the jiffy wrap... */
-               timeout = (long)(next_check_time - jiffies);
-               if (timeout > 0) {
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       add_wait_queue(&kranal_data.kra_reaper_waitq, &wait);
-
-                       spin_unlock_irqrestore(&kranal_data.kra_reaper_lock,
-                                                  flags);
-
-                       waitq_timedwait(&wait, TASK_INTERRUPTIBLE,
-                                           timeout);
-
-                       spin_lock_irqsave(&kranal_data.kra_reaper_lock,
-                                             flags);
-
-                       set_current_state(TASK_RUNNING);
-                       remove_wait_queue(&kranal_data.kra_reaper_waitq, &wait);
-                       continue;
-               }
-
-               if (kranal_data.kra_new_min_timeout !=
-                   MAX_SCHEDULE_TIMEOUT) {
-                       /* new min timeout set: restart min timeout scan */
-                       next_min_timeout = MAX_SCHEDULE_TIMEOUT;
-                       base_index = conn_index - 1;
-                       if (base_index < 0)
-                               base_index = conn_entries - 1;
-
-                       if (kranal_data.kra_new_min_timeout <
-                           current_min_timeout) {
-                               current_min_timeout =
-                                       kranal_data.kra_new_min_timeout;
-                               CDEBUG(D_NET, "Set new min timeout %ld\n",
-                                      current_min_timeout);
-                       }
-
-                       kranal_data.kra_new_min_timeout =
-                               MAX_SCHEDULE_TIMEOUT;
-               }
-               min_timeout = current_min_timeout;
-
-               spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-
-               LASSERT (min_timeout > 0);
-
-               /* Compute how many table entries to check now so I get round
-                * the whole table fast enough given that I do this at fixed
-                * intervals of 'p' seconds) */
-               chunk = conn_entries;
-               if (min_timeout > n * p)
-                       chunk = (chunk * n * p) / min_timeout;
-               if (chunk == 0)
-                       chunk = 1;
-
-               for (i = 0; i < chunk; i++) {
-                       kranal_reaper_check(conn_index,
-                                           &next_min_timeout);
-                       conn_index = (conn_index + 1) % conn_entries;
-               }
-
-               next_check_time += msecs_to_jiffies(p * MSEC_PER_SEC);
-
-               spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
-               if (((conn_index - chunk <= base_index &&
-                     base_index < conn_index) ||
-                    (conn_index - conn_entries - chunk <= base_index &&
-                     base_index < conn_index - conn_entries))) {
-
-                       /* Scanned all conns: set current_min_timeout... */
-                       if (current_min_timeout != next_min_timeout) {
-                               current_min_timeout = next_min_timeout;
-                               CDEBUG(D_NET, "Set new min timeout %ld\n",
-                                      current_min_timeout);
-                       }
-
-                       /* ...and restart min timeout scan */
-                       next_min_timeout = MAX_SCHEDULE_TIMEOUT;
-                       base_index = conn_index - 1;
-                       if (base_index < 0)
-                               base_index = conn_entries - 1;
-               }
-       }
-
-       kranal_thread_fini();
-       return 0;
-}
-
-void
-kranal_check_rdma_cq (kra_device_t *dev)
-{
-        kra_conn_t          *conn;
-        kra_tx_t            *tx;
-        RAP_RETURN           rrc;
-        unsigned long        flags;
-        RAP_RDMA_DESCRIPTOR *desc;
-        __u32                cqid;
-        __u32                event_type;
-
-        for (;;) {
-                rrc = RapkCQDone(dev->rad_rdma_cqh, &cqid, &event_type);
-                if (rrc == RAP_NOT_DONE) {
-                        CDEBUG(D_NET, "RDMA CQ %d empty\n", dev->rad_id);
-                        return;
-                }
-
-                LASSERT (rrc == RAP_SUCCESS);
-                LASSERT ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0);
-
-               read_lock(&kranal_data.kra_global_lock);
-
-                conn = kranal_cqid2conn_locked(cqid);
-                if (conn == NULL) {
-                        /* Conn was destroyed? */
-                        CDEBUG(D_NET, "RDMA CQID lookup %d failed\n", cqid);
-                       read_unlock(&kranal_data.kra_global_lock);
-                        continue;
-                }
-
-                rrc = RapkRdmaDone(conn->rac_rihandle, &desc);
-                LASSERT (rrc == RAP_SUCCESS);
-
-                CDEBUG(D_NET, "Completed %p\n",
-                       cfs_list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list));
-
-               spin_lock_irqsave(&conn->rac_lock, flags);
-
-                LASSERT (!cfs_list_empty(&conn->rac_rdmaq));
-                tx = cfs_list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list);
-                cfs_list_del(&tx->tx_list);
-
-                LASSERT(desc->AppPtr == (void *)tx);
-                LASSERT(tx->tx_msg.ram_type == RANAL_MSG_PUT_DONE ||
-                        tx->tx_msg.ram_type == RANAL_MSG_GET_DONE);
-
-                cfs_list_add_tail(&tx->tx_list, &conn->rac_fmaq);
-                tx->tx_qtime = jiffies;
-
-               spin_unlock_irqrestore(&conn->rac_lock, flags);
-
-                /* Get conn's fmaq processed, now I've just put something
-                 * there */
-                kranal_schedule_conn(conn);
-
-               read_unlock(&kranal_data.kra_global_lock);
-        }
-}
-
-void
-kranal_check_fma_cq (kra_device_t *dev)
-{
-        kra_conn_t         *conn;
-        RAP_RETURN          rrc;
-        __u32               cqid;
-        __u32               event_type;
-        cfs_list_t         *conns;
-        cfs_list_t         *tmp;
-        int                 i;
-
-        for (;;) {
-                rrc = RapkCQDone(dev->rad_fma_cqh, &cqid, &event_type);
-                if (rrc == RAP_NOT_DONE) {
-                        CDEBUG(D_NET, "FMA CQ %d empty\n", dev->rad_id);
-                        return;
-                }
-
-                LASSERT (rrc == RAP_SUCCESS);
-
-                if ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0) {
-
-                       read_lock(&kranal_data.kra_global_lock);
-
-                        conn = kranal_cqid2conn_locked(cqid);
-                        if (conn == NULL) {
-                                CDEBUG(D_NET, "FMA CQID lookup %d failed\n",
-                                       cqid);
-                        } else {
-                                CDEBUG(D_NET, "FMA completed: %p CQID %d\n",
-                                       conn, cqid);
-                                kranal_schedule_conn(conn);
-                        }
-
-                       read_unlock(&kranal_data.kra_global_lock);
-                        continue;
-                }
-
-                /* FMA CQ has overflowed: check ALL conns */
-                CWARN("FMA CQ overflow: scheduling ALL conns on device %d\n", 
-                      dev->rad_id);
-
-                for (i = 0; i < kranal_data.kra_conn_hash_size; i++) {
-
-                       read_lock(&kranal_data.kra_global_lock);
-
-                        conns = &kranal_data.kra_conns[i];
-
-                        cfs_list_for_each (tmp, conns) {
-                                conn = cfs_list_entry(tmp, kra_conn_t,
-                                                      rac_hashlist);
-
-                                if (conn->rac_device == dev)
-                                        kranal_schedule_conn(conn);
-                        }
-
-                        /* don't block write lockers for too long... */
-                       read_unlock(&kranal_data.kra_global_lock);
-                }
-        }
-}
-
-int
-kranal_sendmsg(kra_conn_t *conn, kra_msg_t *msg,
-               void *immediate, int immediatenob)
-{
-        int        sync = (msg->ram_type & RANAL_MSG_FENCE) != 0;
-        RAP_RETURN rrc;
-
-        CDEBUG(D_NET,"%p sending msg %p %02x%s [%p for %d]\n",
-               conn, msg, msg->ram_type, sync ? "(sync)" : "",
-               immediate, immediatenob);
-
-        LASSERT (sizeof(*msg) <= RANAL_FMA_MAX_PREFIX);
-        LASSERT ((msg->ram_type == RANAL_MSG_IMMEDIATE) ?
-                 immediatenob <= RANAL_FMA_MAX_DATA :
-                 immediatenob == 0);
-
-        msg->ram_connstamp = conn->rac_my_connstamp;
-        msg->ram_seq = conn->rac_tx_seq;
-
-        if (sync)
-                rrc = RapkFmaSyncSend(conn->rac_rihandle,
-                                      immediate, immediatenob,
-                                      msg, sizeof(*msg));
-        else
-                rrc = RapkFmaSend(conn->rac_rihandle,
-                                  immediate, immediatenob,
-                                  msg, sizeof(*msg));
-
-        switch (rrc) {
-        default:
-                LBUG();
-
-        case RAP_SUCCESS:
-                conn->rac_last_tx = jiffies;
-                conn->rac_tx_seq++;
-                return 0;
-
-        case RAP_NOT_DONE:
-               if (cfs_time_aftereq(jiffies,
-                                    conn->rac_last_tx +
-                                    msecs_to_jiffies(conn->rac_keepalive *
-                                                     MSEC_PER_SEC)))
-                       CWARN("EAGAIN sending %02x (idle %lu secs)\n",
-                             msg->ram_type,
-                             jiffies_to_msecs(jiffies - conn->rac_last_tx) /
-                             MSEC_PER_SEC);
-               return -EAGAIN;
-        }
-}
-
-void
-kranal_process_fmaq (kra_conn_t *conn)
-{
-        unsigned long flags;
-        int           more_to_do;
-        kra_tx_t     *tx;
-        int           rc;
-        int           expect_reply;
-
-        /* NB 1. kranal_sendmsg() may fail if I'm out of credits right now.
-         *       However I will be rescheduled by an FMA completion event
-         *       when I eventually get some.
-         * NB 2. Sampling rac_state here races with setting it elsewhere.
-         *       But it doesn't matter if I try to send a "real" message just
-         *       as I start closing because I'll get scheduled to send the
-         *       close anyway. */
-
-        /* Not racing with incoming message processing! */
-        LASSERT (current == conn->rac_device->rad_scheduler);
-
-        if (conn->rac_state != RANAL_CONN_ESTABLISHED) {
-                if (!cfs_list_empty(&conn->rac_rdmaq)) {
-                        /* RDMAs in progress */
-                        LASSERT (!conn->rac_close_sent);
-
-                       if (cfs_time_aftereq(jiffies,
-                                            conn->rac_last_tx +
-                                            msecs_to_jiffies(conn->rac_keepalive *
-                                                             MSEC_PER_SEC))) {
-                               CDEBUG(D_NET, "sending NOOP (rdma in progress)\n");
-                               kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
-                               kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
-                       }
-                        return;
-                }
-
-                if (conn->rac_close_sent)
-                        return;
-
-                CWARN("sending CLOSE to %s\n", 
-                      libcfs_nid2str(conn->rac_peer->rap_nid));
-                kranal_init_msg(&conn->rac_msg, RANAL_MSG_CLOSE);
-                rc = kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
-                if (rc != 0)
-                        return;
-
-                conn->rac_close_sent = 1;
-                if (!conn->rac_close_recvd)
-                        return;
-
-               write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-                if (conn->rac_state == RANAL_CONN_CLOSING)
-                        kranal_terminate_conn_locked(conn);
-
-               write_unlock_irqrestore(&kranal_data.kra_global_lock,
-                                            flags);
-                return;
-        }
-
-       spin_lock_irqsave(&conn->rac_lock, flags);
-
-        if (cfs_list_empty(&conn->rac_fmaq)) {
-
-               spin_unlock_irqrestore(&conn->rac_lock, flags);
-
-               if (cfs_time_aftereq(jiffies,
-                                    conn->rac_last_tx +
-                                    msecs_to_jiffies(conn->rac_keepalive *
-                                                     MSEC_PER_SEC))) {
-                       CDEBUG(D_NET, "sending NOOP -> %s (%p idle %lu(%ld))\n",
-                              libcfs_nid2str(conn->rac_peer->rap_nid), conn,
-                              jiffies_to_msecs(jiffies - conn->rac_last_tx) /
-                              MSEC_PER_SEC,
-                              conn->rac_keepalive);
-                       kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
-                       kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
-               }
-                return;
-        }
-
-        tx = cfs_list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list);
-        cfs_list_del(&tx->tx_list);
-        more_to_do = !cfs_list_empty(&conn->rac_fmaq);
-
-       spin_unlock_irqrestore(&conn->rac_lock, flags);
-
-        expect_reply = 0;
-        CDEBUG(D_NET, "sending regular msg: %p, type %02x, cookie "LPX64"\n",
-               tx, tx->tx_msg.ram_type, tx->tx_cookie);
-        switch (tx->tx_msg.ram_type) {
-        default:
-                LBUG();
-
-        case RANAL_MSG_IMMEDIATE:
-                rc = kranal_sendmsg(conn, &tx->tx_msg,
-                                    tx->tx_buffer, tx->tx_nob);
-                break;
-
-        case RANAL_MSG_PUT_NAK:
-        case RANAL_MSG_PUT_DONE:
-        case RANAL_MSG_GET_NAK:
-        case RANAL_MSG_GET_DONE:
-                rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
-                break;
-
-        case RANAL_MSG_PUT_REQ:
-                rc = kranal_map_buffer(tx);
-                LASSERT (rc != -EAGAIN);
-                if (rc != 0)
-                        break;
-
-                tx->tx_msg.ram_u.putreq.raprm_cookie = tx->tx_cookie;
-                rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
-                expect_reply = 1;
-                break;
-
-        case RANAL_MSG_PUT_ACK:
-                rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
-                expect_reply = 1;
-                break;
-
-        case RANAL_MSG_GET_REQ:
-                rc = kranal_map_buffer(tx);
-                LASSERT (rc != -EAGAIN);
-                if (rc != 0)
-                        break;
-
-                tx->tx_msg.ram_u.get.ragm_cookie = tx->tx_cookie;
-                tx->tx_msg.ram_u.get.ragm_desc.rard_key = tx->tx_map_key;
-                tx->tx_msg.ram_u.get.ragm_desc.rard_addr.AddressBits =
-                        (__u64)((unsigned long)tx->tx_buffer);
-                tx->tx_msg.ram_u.get.ragm_desc.rard_nob = tx->tx_nob;
-                rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
-                expect_reply = 1;
-                break;
-        }
-
-        if (rc == -EAGAIN) {
-                /* I need credits to send this.  Replace tx at the head of the
-                 * fmaq and I'll get rescheduled when credits appear */
-                CDEBUG(D_NET, "EAGAIN on %p\n", conn);
-               spin_lock_irqsave(&conn->rac_lock, flags);
-                cfs_list_add(&tx->tx_list, &conn->rac_fmaq);
-               spin_unlock_irqrestore(&conn->rac_lock, flags);
-                return;
-        }
-
-        if (!expect_reply || rc != 0) {
-                kranal_tx_done(tx, rc);
-        } else {
-                /* LASSERT(current) above ensures this doesn't race with reply
-                 * processing */
-               spin_lock_irqsave(&conn->rac_lock, flags);
-                cfs_list_add_tail(&tx->tx_list, &conn->rac_replyq);
-                tx->tx_qtime = jiffies;
-               spin_unlock_irqrestore(&conn->rac_lock, flags);
-        }
-
-        if (more_to_do) {
-                CDEBUG(D_NET, "Rescheduling %p (more to do)\n", conn);
-                kranal_schedule_conn(conn);
-        }
-}
-
-static inline void
-kranal_swab_rdma_desc (kra_rdma_desc_t *d)
-{
-        __swab64s(&d->rard_key.Key);
-        __swab16s(&d->rard_key.Cookie);
-        __swab16s(&d->rard_key.MdHandle);
-        __swab32s(&d->rard_key.Flags);
-        __swab64s(&d->rard_addr.AddressBits);
-        __swab32s(&d->rard_nob);
-}
-
-kra_tx_t *
-kranal_match_reply(kra_conn_t *conn, int type, __u64 cookie)
-{
-        cfs_list_t       *ttmp;
-        kra_tx_t         *tx;
-        unsigned long     flags;
-
-       spin_lock_irqsave(&conn->rac_lock, flags);
-
-        cfs_list_for_each(ttmp, &conn->rac_replyq) {
-                tx = cfs_list_entry(ttmp, kra_tx_t, tx_list);
-
-                CDEBUG(D_NET,"Checking %p %02x/"LPX64"\n",
-                       tx, tx->tx_msg.ram_type, tx->tx_cookie);
-
-                if (tx->tx_cookie != cookie)
-                        continue;
-
-                if (tx->tx_msg.ram_type != type) {
-                       spin_unlock_irqrestore(&conn->rac_lock, flags);
-                        CWARN("Unexpected type %x (%x expected) "
-                              "matched reply from %s\n",
-                              tx->tx_msg.ram_type, type,
-                              libcfs_nid2str(conn->rac_peer->rap_nid));
-                        return NULL;
-                }
-
-                cfs_list_del(&tx->tx_list);
-               spin_unlock_irqrestore(&conn->rac_lock, flags);
-                return tx;
-        }
-
-       spin_unlock_irqrestore(&conn->rac_lock, flags);
-        CWARN("Unmatched reply %02x/"LPX64" from %s\n",
-              type, cookie, libcfs_nid2str(conn->rac_peer->rap_nid));
-        return NULL;
-}
-
-void
-kranal_check_fma_rx (kra_conn_t *conn)
-{
-        unsigned long flags;
-        __u32         seq;
-        kra_tx_t     *tx;
-        kra_msg_t    *msg;
-        void         *prefix;
-        RAP_RETURN    rrc = RapkFmaGetPrefix(conn->rac_rihandle, &prefix);
-        kra_peer_t   *peer = conn->rac_peer;
-        int           rc = 0;
-        int           repost = 1;
-
-        if (rrc == RAP_NOT_DONE)
-                return;
-
-        CDEBUG(D_NET, "RX on %p\n", conn);
-
-        LASSERT (rrc == RAP_SUCCESS);
-        conn->rac_last_rx = jiffies;
-        seq = conn->rac_rx_seq++;
-        msg = (kra_msg_t *)prefix;
-
-        /* stash message for portals callbacks they'll NULL
-         * rac_rxmsg if they consume it */
-        LASSERT (conn->rac_rxmsg == NULL);
-        conn->rac_rxmsg = msg;
-
-        if (msg->ram_magic != RANAL_MSG_MAGIC) {
-                if (__swab32(msg->ram_magic) != RANAL_MSG_MAGIC) {
-                        CERROR("Unexpected magic %08x from %s\n",
-                               msg->ram_magic, libcfs_nid2str(peer->rap_nid));
-                        rc = -EPROTO;
-                        goto out;
-                }
-
-                __swab32s(&msg->ram_magic);
-                __swab16s(&msg->ram_version);
-                __swab16s(&msg->ram_type);
-                __swab64s(&msg->ram_srcnid);
-                __swab64s(&msg->ram_connstamp);
-                __swab32s(&msg->ram_seq);
-
-                /* NB message type checked below; NOT here... */
-                switch (msg->ram_type) {
-                case RANAL_MSG_PUT_ACK:
-                        kranal_swab_rdma_desc(&msg->ram_u.putack.rapam_desc);
-                        break;
-
-                case RANAL_MSG_GET_REQ:
-                        kranal_swab_rdma_desc(&msg->ram_u.get.ragm_desc);
-                        break;
-
-                default:
-                        break;
-                }
-        }
-
-        if (msg->ram_version != RANAL_MSG_VERSION) {
-                CERROR("Unexpected protocol version %d from %s\n",
-                       msg->ram_version, libcfs_nid2str(peer->rap_nid));
-                rc = -EPROTO;
-                goto out;
-        }
-
-        if (msg->ram_srcnid != peer->rap_nid) {
-                CERROR("Unexpected peer %s from %s\n",
-                       libcfs_nid2str(msg->ram_srcnid), 
-                       libcfs_nid2str(peer->rap_nid));
-                rc = -EPROTO;
-                goto out;
-        }
-
-        if (msg->ram_connstamp != conn->rac_peer_connstamp) {
-                CERROR("Unexpected connstamp "LPX64"("LPX64
-                       " expected) from %s\n",
-                       msg->ram_connstamp, conn->rac_peer_connstamp,
-                       libcfs_nid2str(peer->rap_nid));
-                rc = -EPROTO;
-                goto out;
-        }
-
-        if (msg->ram_seq != seq) {
-                CERROR("Unexpected sequence number %d(%d expected) from %s\n",
-                       msg->ram_seq, seq, libcfs_nid2str(peer->rap_nid));
-                rc = -EPROTO;
-                goto out;
-        }
-
-        if ((msg->ram_type & RANAL_MSG_FENCE) != 0) {
-                /* This message signals RDMA completion... */
-                rrc = RapkFmaSyncWait(conn->rac_rihandle);
-                if (rrc != RAP_SUCCESS) {
-                        CERROR("RapkFmaSyncWait failed: %d\n", rrc);
-                        rc = -ENETDOWN;
-                        goto out;
-                }
-        }
-
-        if (conn->rac_close_recvd) {
-                CERROR("Unexpected message %d after CLOSE from %s\n",
-                       msg->ram_type, libcfs_nid2str(conn->rac_peer->rap_nid));
-                rc = -EPROTO;
-                goto out;
-        }
-
-        if (msg->ram_type == RANAL_MSG_CLOSE) {
-                CWARN("RX CLOSE from %s\n", libcfs_nid2str(conn->rac_peer->rap_nid));
-                conn->rac_close_recvd = 1;
-               write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-                if (conn->rac_state == RANAL_CONN_ESTABLISHED)
-                        kranal_close_conn_locked(conn, 0);
-                else if (conn->rac_state == RANAL_CONN_CLOSING &&
-                         conn->rac_close_sent)
-                        kranal_terminate_conn_locked(conn);
-
-               write_unlock_irqrestore(&kranal_data.kra_global_lock,
-                                            flags);
-                goto out;
-        }
-
-        if (conn->rac_state != RANAL_CONN_ESTABLISHED)
-                goto out;
-
-        switch (msg->ram_type) {
-        case RANAL_MSG_NOOP:
-                /* Nothing to do; just a keepalive */
-                CDEBUG(D_NET, "RX NOOP on %p\n", conn);
-                break;
-
-        case RANAL_MSG_IMMEDIATE:
-                CDEBUG(D_NET, "RX IMMEDIATE on %p\n", conn);
-                rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.immediate.raim_hdr, 
-                                msg->ram_srcnid, conn, 0);
-                repost = rc < 0;
-                break;
-
-        case RANAL_MSG_PUT_REQ:
-                CDEBUG(D_NET, "RX PUT_REQ on %p\n", conn);
-                rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.putreq.raprm_hdr, 
-                                msg->ram_srcnid, conn, 1);
-                repost = rc < 0;
-                break;
-
-        case RANAL_MSG_PUT_NAK:
-                CDEBUG(D_NET, "RX PUT_NAK on %p\n", conn);
-                tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ,
-                                        msg->ram_u.completion.racm_cookie);
-                if (tx == NULL)
-                        break;
-
-                LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
-                         tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-                kranal_tx_done(tx, -ENOENT);    /* no match */
-                break;
-
-        case RANAL_MSG_PUT_ACK:
-                CDEBUG(D_NET, "RX PUT_ACK on %p\n", conn);
-                tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ,
-                                        msg->ram_u.putack.rapam_src_cookie);
-                if (tx == NULL)
-                        break;
-
-                kranal_rdma(tx, RANAL_MSG_PUT_DONE,
-                            &msg->ram_u.putack.rapam_desc,
-                            msg->ram_u.putack.rapam_desc.rard_nob,
-                            msg->ram_u.putack.rapam_dst_cookie);
-                break;
-
-        case RANAL_MSG_PUT_DONE:
-                CDEBUG(D_NET, "RX PUT_DONE on %p\n", conn);
-                tx = kranal_match_reply(conn, RANAL_MSG_PUT_ACK,
-                                        msg->ram_u.completion.racm_cookie);
-                if (tx == NULL)
-                        break;
-
-                LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
-                         tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-                kranal_tx_done(tx, 0);
-                break;
-
-        case RANAL_MSG_GET_REQ:
-                CDEBUG(D_NET, "RX GET_REQ on %p\n", conn);
-                rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.get.ragm_hdr, 
-                                msg->ram_srcnid, conn, 1);
-                repost = rc < 0;
-                break;
-
-        case RANAL_MSG_GET_NAK:
-                CDEBUG(D_NET, "RX GET_NAK on %p\n", conn);
-                tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ,
-                                        msg->ram_u.completion.racm_cookie);
-                if (tx == NULL)
-                        break;
-
-                LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
-                         tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-                kranal_tx_done(tx, -ENOENT);    /* no match */
-                break;
-
-        case RANAL_MSG_GET_DONE:
-                CDEBUG(D_NET, "RX GET_DONE on %p\n", conn);
-                tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ,
-                                        msg->ram_u.completion.racm_cookie);
-                if (tx == NULL)
-                        break;
-
-                LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
-                         tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-#if 0
-                /* completion message should send rdma length if we ever allow
-                 * GET truncation */
-                lnet_set_reply_msg_len(kranal_data.kra_ni, tx->tx_lntmsg[1], ???);
-#endif
-                kranal_tx_done(tx, 0);
-                break;
-        }
-
- out:
-        if (rc < 0)                             /* protocol/comms error */
-                kranal_close_conn (conn, rc);
-
-        if (repost && conn->rac_rxmsg != NULL)
-                kranal_consume_rxmsg(conn, NULL, 0);
-
-        /* check again later */
-        kranal_schedule_conn(conn);
-}
-
-void
-kranal_complete_closed_conn (kra_conn_t *conn)
-{
-        kra_tx_t   *tx;
-        int         nfma;
-        int         nreplies;
-
-        LASSERT (conn->rac_state == RANAL_CONN_CLOSED);
-        LASSERT (cfs_list_empty(&conn->rac_list));
-        LASSERT (cfs_list_empty(&conn->rac_hashlist));
-
-        for (nfma = 0; !cfs_list_empty(&conn->rac_fmaq); nfma++) {
-                tx = cfs_list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list);
-
-                cfs_list_del(&tx->tx_list);
-                kranal_tx_done(tx, -ECONNABORTED);
-        }
-
-        LASSERT (cfs_list_empty(&conn->rac_rdmaq));
-
-        for (nreplies = 0; !cfs_list_empty(&conn->rac_replyq); nreplies++) {
-                tx = cfs_list_entry(conn->rac_replyq.next, kra_tx_t, tx_list);
-
-                cfs_list_del(&tx->tx_list);
-                kranal_tx_done(tx, -ECONNABORTED);
-        }
-
-        CWARN("Closed conn %p -> %s: nmsg %d nreplies %d\n",
-               conn, libcfs_nid2str(conn->rac_peer->rap_nid), nfma, nreplies);
-}
-
-int kranal_process_new_conn (kra_conn_t *conn)
-{
-       RAP_RETURN   rrc;
-
-       rrc = RapkCompleteSync(conn->rac_rihandle, 1);
-       if (rrc == RAP_SUCCESS)
-               return 0;
-
-       LASSERT (rrc == RAP_NOT_DONE);
-       if (!cfs_time_aftereq(jiffies, conn->rac_last_tx +
-                             msecs_to_jiffies(conn->rac_timeout*MSEC_PER_SEC)))
-               return -EAGAIN;
-
-       /* Too late */
-       rrc = RapkCompleteSync(conn->rac_rihandle, 0);
-       LASSERT (rrc == RAP_SUCCESS);
-       return -ETIMEDOUT;
-}
-
-int
-kranal_scheduler (void *arg)
-{
-       kra_device_t     *dev = (kra_device_t *)arg;
-       wait_queue_t    wait;
-       kra_conn_t       *conn;
-        unsigned long     flags;
-        unsigned long     deadline;
-        unsigned long     soonest;
-        int               nsoonest;
-        long              timeout;
-        cfs_list_t       *tmp;
-        cfs_list_t       *nxt;
-        int               rc;
-        int               dropped_lock;
-        int               busy_loops = 0;
-
-        cfs_block_allsigs();
-
-       dev->rad_scheduler = current;
-       init_waitqueue_entry_current(&wait);
-
-       spin_lock_irqsave(&dev->rad_lock, flags);
-
-        while (!kranal_data.kra_shutdown) {
-                /* Safe: kra_shutdown only set when quiescent */
-
-                if (busy_loops++ >= RANAL_RESCHED) {
-                       spin_unlock_irqrestore(&dev->rad_lock, flags);
-
-                       cond_resched();
-                       busy_loops = 0;
-
-                       spin_lock_irqsave(&dev->rad_lock, flags);
-                }
-
-                dropped_lock = 0;
-
-                if (dev->rad_ready) {
-                        /* Device callback fired since I last checked it */
-                        dev->rad_ready = 0;
-                       spin_unlock_irqrestore(&dev->rad_lock, flags);
-                        dropped_lock = 1;
-
-                        kranal_check_rdma_cq(dev);
-                        kranal_check_fma_cq(dev);
-
-                       spin_lock_irqsave(&dev->rad_lock, flags);
-                }
-
-                cfs_list_for_each_safe(tmp, nxt, &dev->rad_ready_conns) {
-                        conn = cfs_list_entry(tmp, kra_conn_t, rac_schedlist);
-
-                        cfs_list_del_init(&conn->rac_schedlist);
-                        LASSERT (conn->rac_scheduled);
-                        conn->rac_scheduled = 0;
-                       spin_unlock_irqrestore(&dev->rad_lock, flags);
-                        dropped_lock = 1;
-
-                        kranal_check_fma_rx(conn);
-                        kranal_process_fmaq(conn);
-
-                        if (conn->rac_state == RANAL_CONN_CLOSED)
-                                kranal_complete_closed_conn(conn);
-
-                        kranal_conn_decref(conn);
-                       spin_lock_irqsave(&dev->rad_lock, flags);
-                }
-
-                nsoonest = 0;
-                soonest = jiffies;
-
-                cfs_list_for_each_safe(tmp, nxt, &dev->rad_new_conns) {
-                        conn = cfs_list_entry(tmp, kra_conn_t, rac_schedlist);
-
-                        deadline = conn->rac_last_tx + conn->rac_keepalive;
-                        if (cfs_time_aftereq(jiffies, deadline)) {
-                                /* Time to process this new conn */
-                               spin_unlock_irqrestore(&dev->rad_lock,
-                                                           flags);
-                                dropped_lock = 1;
-
-                                rc = kranal_process_new_conn(conn);
-                                if (rc != -EAGAIN) {
-                                        /* All done with this conn */
-                                       spin_lock_irqsave(&dev->rad_lock,
-                                                              flags);
-                                        cfs_list_del_init(&conn->rac_schedlist);
-                                       spin_unlock_irqrestore(&dev-> \
-                                                                   rad_lock,
-                                                                   flags);
-
-                                        kranal_conn_decref(conn);
-                                       spin_lock_irqsave(&dev->rad_lock,
-                                                              flags);
-                                        continue;
-                                }
-
-                               /* retry with exponential backoff until HZ */
-                               if (conn->rac_keepalive == 0)
-                                       conn->rac_keepalive = 1;
-                               else if (conn->rac_keepalive <=
-                                        msecs_to_jiffies(MSEC_PER_SEC))
-                                       conn->rac_keepalive *= 2;
-                               else
-                                       conn->rac_keepalive +=
-                                               msecs_to_jiffies(MSEC_PER_SEC);
-
-                               deadline = conn->rac_last_tx + conn->rac_keepalive;
-                               spin_lock_irqsave(&dev->rad_lock, flags);
-                        }
-
-                        /* Does this conn need attention soonest? */
-                        if (nsoonest++ == 0 ||
-                            !cfs_time_aftereq(deadline, soonest))
-                                soonest = deadline;
-                }
-
-                if (dropped_lock)               /* may sleep iff I didn't drop the lock */
-                        continue;
-
-               set_current_state(TASK_INTERRUPTIBLE);
-               add_wait_queue_exclusive(&dev->rad_waitq, &wait);
-               spin_unlock_irqrestore(&dev->rad_lock, flags);
-
-               if (nsoonest == 0) {
-                       busy_loops = 0;
-                       waitq_wait(&wait, TASK_INTERRUPTIBLE);
-               } else {
-                       timeout = (long)(soonest - jiffies);
-                       if (timeout > 0) {
-                               busy_loops = 0;
-                               waitq_timedwait(&wait,
-                                                   TASK_INTERRUPTIBLE,
-                                                   timeout);
-                       }
-               }
-
-               remove_wait_queue(&dev->rad_waitq, &wait);
-               set_current_state(TASK_RUNNING);
-               spin_lock_irqsave(&dev->rad_lock, flags);
-       }
-
-       spin_unlock_irqrestore(&dev->rad_lock, flags);
-
-        dev->rad_scheduler = NULL;
-        kranal_thread_fini();
-        return 0;
-}
diff --git a/lnet/klnds/ralnd/ralnd_modparams.c b/lnet/klnds/ralnd/ralnd_modparams.c
deleted file mode 100644 (file)
index 2d5e64f..0000000
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ralnd/ralnd_modparams.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "ralnd.h"
-
-static int n_connd = 4;
-CFS_MODULE_PARM(n_connd, "i", int, 0444,
-                "# of connection daemons");
-
-static int min_reconnect_interval = 1;
-CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644,
-                "minimum connection retry interval (seconds)");
-
-static int max_reconnect_interval = 60;
-CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644,
-                "maximum connection retry interval (seconds)");
-
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
-                "# of transmit descriptors");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
-                "# concurrent sends");
-
-static int peer_credits = 32;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
-                "# concurrent sends to 1 peer");
-
-static int fma_cq_size = 8192;
-CFS_MODULE_PARM(fma_cq_size, "i", int, 0444,
-                "size of the completion queue");
-
-static int timeout = 30;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
-                "communications timeout (seconds)");
-
-static int max_immediate = (2<<10);
-CFS_MODULE_PARM(max_immediate, "i", int, 0644,
-                "immediate/RDMA breakpoint");
-
-kra_tunables_t kranal_tunables = {
-        .kra_n_connd                = &n_connd,
-        .kra_min_reconnect_interval = &min_reconnect_interval,
-        .kra_max_reconnect_interval = &max_reconnect_interval,
-        .kra_ntx                    = &ntx,
-        .kra_credits                = &credits,
-        .kra_peercredits            = &peer_credits,
-        .kra_fma_cq_size            = &fma_cq_size,
-        .kra_timeout                = &timeout,
-        .kra_max_immediate          = &max_immediate,
-};
-
-#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
-
-static struct ctl_table kranal_ctl_table[] = {
-       {
-               INIT_CTL_NAME
-               .procname       = "n_connd",
-               .data           = &n_connd,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "min_reconnect_interval",
-               .data           = &min_reconnect_interval,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "max_reconnect_interval",
-               .data           = &max_reconnect_interval,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "ntx",
-               .data           = &ntx,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "credits",
-               .data           = &credits,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "peer_credits",
-               .data           = &peer_credits,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "fma_cq_size",
-               .data           = &fma_cq_size,
-               .maxlen         = sizeof(int),
-               .mode           = 0444,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "timeout",
-               .data           = &timeout,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec
-       },
-       {
-               INIT_CTL_NAME
-               .procname       = "max_immediate",
-               .data           = &max_immediate,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec
-       },
-       { 0 }
-};
-
-static struct ctl_table kranal_top_ctl_table[] = {
-       {
-               INIT_CTL_NAME
-               .procname       = "ranal",
-               .data           = NULL,
-               .maxlen         = 0,
-               .mode           = 0555,
-               .child          = kranal_ctl_table
-       },
-       { 0 }
-};
-
-int
-kranal_tunables_init ()
-{
-       kranal_tunables.kra_sysctl =
-               register_sysctl_table(kranal_top_ctl_table);
-
-       if (kranal_tunables.kra_sysctl == NULL)
-               CWARN("Can't setup /proc tunables\n");
-
-       return 0;
-}
-
-void kranal_tunables_fini()
-{
-       if (kranal_tunables.kra_sysctl != NULL)
-               unregister_sysctl_table(kranal_tunables.kra_sysctl);
-}
-
-#else
-
-int
-kranal_tunables_init ()
-{
-       return 0;
-}
-
-void
-kranal_tunables_fini ()
-{
-}
-
-#endif
index 4eb953b..a40f17b 100644 (file)
@@ -1245,83 +1245,3 @@ lnet_parse_ip2nets (char **networksp, char *ip2nets)
 
         return 0;
 }
-
-int
-lnet_set_ip_niaddr (lnet_ni_t *ni)
-{
-        __u32  net = LNET_NIDNET(ni->ni_nid);
-        char **names;
-        int    n;
-        __u32  ip;
-        __u32  netmask;
-        int    up;
-        int    i;
-        int    rc;
-
-        /* Convenience for LNDs that use the IP address of a local interface as
-         * the local address part of their NID */
-
-        if (ni->ni_interfaces[0] != NULL) {
-
-                CLASSERT (LNET_MAX_INTERFACES > 1);
-
-                if (ni->ni_interfaces[1] != NULL) {
-                        CERROR("Net %s doesn't support multiple interfaces\n",
-                               libcfs_net2str(net));
-                        return -EPERM;
-                }
-
-                rc = libcfs_ipif_query(ni->ni_interfaces[0],
-                                       &up, &ip, &netmask);
-                if (rc != 0) {
-                        CERROR("Net %s can't query interface %s: %d\n",
-                               libcfs_net2str(net), ni->ni_interfaces[0], rc);
-                        return -EPERM;
-                }
-
-                if (!up) {
-                        CERROR("Net %s can't use interface %s: it's down\n",
-                               libcfs_net2str(net), ni->ni_interfaces[0]);
-                        return -ENETDOWN;
-                }
-
-                ni->ni_nid = LNET_MKNID(net, ip);
-                return 0;
-        }
-
-        n = libcfs_ipif_enumerate(&names);
-        if (n <= 0) {
-                CERROR("Net %s can't enumerate interfaces: %d\n",
-                       libcfs_net2str(net), n);
-                return 0;
-        }
-
-        for (i = 0; i < n; i++) {
-                if (!strcmp(names[i], "lo")) /* skip the loopback IF */
-                        continue;
-
-                rc = libcfs_ipif_query(names[i], &up, &ip, &netmask);
-
-                if (rc != 0) {
-                        CWARN("Net %s can't query interface %s: %d\n",
-                              libcfs_net2str(net), names[i], rc);
-                        continue;
-                }
-
-                if (!up) {
-                        CWARN("Net %s ignoring interface %s (down)\n",
-                              libcfs_net2str(net), names[i]);
-                        continue;
-                }
-
-                libcfs_ipif_free_enumeration(names, n);
-                ni->ni_nid = LNET_MKNID(net, ip);
-                return 0;
-        }
-
-        CERROR("Net %s can't find any interfaces\n", libcfs_net2str(net));
-        libcfs_ipif_free_enumeration(names, n);
-        return -ENOENT;
-}
-EXPORT_SYMBOL(lnet_set_ip_niaddr);
-
index 1954a31..3f11c75 100644 (file)
@@ -774,13 +774,9 @@ static struct mod_paths {
 } mod_paths[] = {
        { "libcfs", "libcfs/libcfs" },
        { "lnet", "lnet/lnet" },
-       { "kmxlnd", "lnet/klnds/mxlnd" },
        { "ko2iblnd", "lnet/klnds/o2iblnd" },
        { "kgnilnd", "lnet/klnds/gnilnd"},
-       { "kqswlnd", "lnet/klnds/qswlnd" },
-       { "kralnd", "lnet/klnds/ralnd" },
        { "ksocklnd", "lnet/klnds/socklnd" },
-       { "ktdilnd", "lnet/klnds/tdilnd" },
        { "obdclass", "lustre/obdclass" },
        { "llog_test", "lustre/obdclass" },
        { "ptlrpc_gss", "lustre/ptlrpc/gss" },
index f1e34f0..c8e67c8 100644 (file)
@@ -574,8 +574,7 @@ jt_ptl_print_peers (int argc, char **argv)
         int                      index;
         int                      rc;
 
-       if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND,
-                                 O2IBLND, GNILND, 0))
+       if (!g_net_is_compatible (argv[0], SOCKLND, O2IBLND, GNILND, 0))
                 return -1;
 
         for (index = 0;;index++) {
@@ -591,7 +590,7 @@ jt_ptl_print_peers (int argc, char **argv)
                         id.nid = data.ioc_nid;
                         id.pid = data.ioc_u32[4];
                         printf ("%-20s [%d]%s->%s:%d #%d\n",
-                                libcfs_id2str(id), 
+                               libcfs_id2str(id),
                                 data.ioc_count, /* persistence */
                                /* my ip */
                                ptl_ipaddr_2_str(data.ioc_u32[2], buffer[0],
@@ -601,14 +600,6 @@ jt_ptl_print_peers (int argc, char **argv)
                                                 sizeof(buffer[1]), 1),
                                 data.ioc_u32[1], /* peer port */
                                 data.ioc_u32[3]); /* conn_count */
-                } else if (g_net_is_compatible(NULL, RALND, 0)) {
-                        printf ("%-20s [%d]@%s:%d\n",
-                                libcfs_nid2str(data.ioc_nid), /* peer nid */
-                                data.ioc_count,   /* peer persistence */
-                               /* peer ip */
-                               ptl_ipaddr_2_str(data.ioc_u32[0], buffer[1],
-                                                sizeof(buffer[1]), 1),
-                                data.ioc_u32[1]); /* peer port */
                } else if (g_net_is_compatible(NULL, GNILND, 0)) {
                        int disconn = data.ioc_flags >> 16;
                        char *state;
@@ -656,8 +647,7 @@ jt_ptl_add_peer (int argc, char **argv)
         int                      port = 0;
         int                      rc;
 
-       if (!g_net_is_compatible (argv[0], SOCKLND, RALND,
-                                 GNILND, 0))
+       if (!g_net_is_compatible(argv[0], SOCKLND, GNILND, 0))
                 return -1;
 
         if (argc != 4) {
@@ -707,8 +697,7 @@ jt_ptl_del_peer (int argc, char **argv)
         __u32                    ip = 0;
         int                      rc;
 
-       if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND,
-                                 O2IBLND, GNILND, 0))
+       if (!g_net_is_compatible(argv[0], SOCKLND, O2IBLND, GNILND, 0))
                 return -1;
 
         if (g_net_is_compatible(NULL, SOCKLND, 0)) {
@@ -762,8 +751,7 @@ jt_ptl_print_connections (int argc, char **argv)
         int                      index;
         int                      rc;
 
-       if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, O2IBLND,
-                                 GNILND, 0))
+       if (!g_net_is_compatible(argv[0], SOCKLND, O2IBLND, GNILND, 0))
                 return -1;
 
         for (index = 0; ; index++) {
@@ -795,10 +783,6 @@ jt_ptl_print_connections (int argc, char **argv)
                                 data.ioc_count, /* tx buffer size */
                                 data.ioc_u32[5], /* rx buffer size */
                                 data.ioc_flags ? "nagle" : "nonagle");
-                } else if (g_net_is_compatible (NULL, RALND, 0)) {
-                        printf ("%-20s [%d]\n",
-                                libcfs_nid2str(data.ioc_nid),
-                                data.ioc_u32[0] /* device id */);
                 } else if (g_net_is_compatible (NULL, O2IBLND, 0)) {
                         printf ("%s mtu %d\n",
                                 libcfs_nid2str(data.ioc_nid),
@@ -836,8 +820,7 @@ int jt_ptl_disconnect(int argc, char **argv)
                 return 0;
         }
 
-       if (!g_net_is_compatible (NULL, SOCKLND, RALND, MXLND, O2IBLND,
-                                 GNILND, 0))
+       if (!g_net_is_compatible(NULL, SOCKLND, O2IBLND, GNILND, 0))
                 return 0;
 
         if (argc >= 2 &&
@@ -902,49 +885,6 @@ int jt_ptl_push_connection (int argc, char **argv)
         return 0;
 }
 
-int
-jt_ptl_print_active_txs (int argc, char **argv)
-{
-        struct libcfs_ioctl_data data;
-        int                      index;
-        int                      rc;
-
-        if (!g_net_is_compatible (argv[0], QSWLND, 0))
-                return -1;
-
-        for (index = 0;;index++) {
-                LIBCFS_IOC_INIT(data);
-                data.ioc_net   = g_net;
-                data.ioc_count = index;
-
-                rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_TXDESC, &data);
-                if (rc != 0)
-                        break;
-
-                printf ("type %u payload %6d to %s via %s by pid %6d: "
-                        "%s, %s, state %d\n",
-                        data.ioc_u32[0],
-                        data.ioc_count,
-                        libcfs_nid2str(data.ioc_nid),
-                        libcfs_nid2str(data.ioc_u64[0]),
-                        data.ioc_u32[1],
-                        (data.ioc_flags & 1) ? "delayed" : "immediate",
-                        (data.ioc_flags & 2) ? "nblk"    : "normal",
-                        data.ioc_flags >> 2);
-        }
-
-        if (index == 0) {
-                if (errno == ENOENT) {
-                        printf ("<no active descs>\n");
-                } else {
-                        fprintf(stderr, "Error getting active transmits list: "
-                                "%s: check dmesg.\n",
-                                strerror(errno));
-                }
-        }
-        return 0;
-}
-
 int jt_ptl_ping(int argc, char **argv)
 {
         int                      rc;
index ed79e6a..c3a3379 100644 (file)
@@ -84,9 +84,6 @@ type.
 Print all the connected remote NIDs for a given
 .B network
 type.
-.TP
-.BI active_tx 
-This command should print active transmits, and it is only used for elan network type.
 .TP 
 .BI route_list 
 Print the complete routing table.
index 6de68b8..c8351d8 100644 (file)
@@ -102,8 +102,6 @@ command_t cmdlist[] = {
         {"conn_list", jt_ptl_print_connections, 0,
          "print all the connected remote nid\n"
          "usage: conn_list"},
-        {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits\n"
-         "usage: active_tx"},
         {"route_list", jt_ptl_print_routes, 0,
          "print the portals routing table, same as show_route\n"
          "usage: route_list"},