X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fmxlnd%2Fmxlnd.c;h=fb80d3e4e68793485a4cfb060b2df6f47f9a30a7;hb=362f4b5dd80d45c084d518eb7f037f4896cf4311;hp=34dbf5e8931c6b71f9762d9ad391798cd4c70107;hpb=2371e317a0384e5a57049d08f67d5be06970b1ee;p=fs%2Flustre-release.git

diff --git a/lnet/klnds/mxlnd/mxlnd.c b/lnet/klnds/mxlnd/mxlnd.c
index 34dbf5e..fb80d3e 100644
--- a/lnet/klnds/mxlnd/mxlnd.c
+++ b/lnet/klnds/mxlnd/mxlnd.c
@@ -1,25 +1,44 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- * Copyright (C) 2006 Myricom, Inc.
- *   Author: Scott Atchley <atchley at myri.com>
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * Copyright (C) 2006 Myricom, Inc.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * lnet/klnds/mxlnd/mxlnd.c
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ * Author: Scott Atchley <atchley at myri.com>
  */
 
 #include "mxlnd.h"
@@ -35,76 +54,57 @@ lnd_t the_kmxlnd = {
 
 kmx_data_t               kmxlnd_data;
 
-/**
- * mxlnd_ctx_free - free ctx struct
- * @ctx - a kmx_peer pointer
- *
- * The calling function should remove the ctx from the ctx list first
- * then free it.
- */
 void
-mxlnd_ctx_free(struct kmx_ctx *ctx)
+mxlnd_free_pages(kmx_pages_t *p)
 {
-        if (ctx == NULL) return;
+        int     npages = p->mxg_npages;
+        int     i;
 
-        if (ctx->mxc_page != NULL) {
-                __free_page(ctx->mxc_page);
-                spin_lock(&kmxlnd_data.kmx_global_lock);
-                kmxlnd_data.kmx_mem_used -= MXLND_EAGER_SIZE;
-                spin_unlock(&kmxlnd_data.kmx_global_lock);
-        }
+        CDEBUG(D_MALLOC, "freeing %d pages\n", npages);
 
-        if (ctx->mxc_seg_list != NULL) {
-                LASSERT(ctx->mxc_nseg > 0);
-                MXLND_FREE(ctx->mxc_seg_list, ctx->mxc_nseg * sizeof(mx_ksegment_t));
+        for (i = 0; i < npages; i++) {
+                if (p->mxg_pages[i] != NULL) {
+                        __free_page(p->mxg_pages[i]);
+                        cfs_spin_lock(&kmxlnd_data.kmx_mem_lock);
+                        kmxlnd_data.kmx_mem_used -= PAGE_SIZE;
+                        cfs_spin_unlock(&kmxlnd_data.kmx_mem_lock);
+                }
         }
 
-        MXLND_FREE (ctx, sizeof (*ctx));
-        return;
+        MXLND_FREE(p, offsetof(kmx_pages_t, mxg_pages[npages]));
 }
 
-/**
- * mxlnd_ctx_alloc - allocate and initialize a new ctx struct
- * @ctxp - address of a kmx_ctx pointer
- *
- * Returns 0 on success and -EINVAL, -ENOMEM on failure
- */
 int
-mxlnd_ctx_alloc(struct kmx_ctx **ctxp, enum kmx_req_type type)
+mxlnd_alloc_pages(kmx_pages_t **pp, int npages)
 {
-        int             ret     = 0;
-        struct kmx_ctx  *ctx    = NULL;
+        kmx_pages_t    *p       = NULL;
+        int             i       = 0;
 
-        if (ctxp == NULL) return -EINVAL;
+        CDEBUG(D_MALLOC, "allocing %d pages\n", npages);
 
-        MXLND_ALLOC(ctx, sizeof (*ctx));
-        if (ctx == NULL) {
-                CDEBUG(D_NETERROR, "Cannot allocate ctx\n");
+        MXLND_ALLOC(p, offsetof(kmx_pages_t, mxg_pages[npages]));
+        if (p == NULL) {
+                CERROR("Can't allocate descriptor for %d pages\n", npages);
                 return -ENOMEM;
         }
-        memset(ctx, 0, sizeof(*ctx));
-        spin_lock_init(&ctx->mxc_lock);
-
-        ctx->mxc_type = type;
-        ctx->mxc_page = alloc_page (GFP_KERNEL);
-        if (ctx->mxc_page == NULL) {
-                CDEBUG(D_NETERROR, "Can't allocate page\n");
-                ret = -ENOMEM;
-                goto failed;
+
+        memset(p, 0, offsetof(kmx_pages_t, mxg_pages[npages]));
+        p->mxg_npages = npages;
+
+        for (i = 0; i < npages; i++) {
+                p->mxg_pages[i] = alloc_page(GFP_KERNEL);
+                if (p->mxg_pages[i] == NULL) {
+                        CERROR("Can't allocate page %d of %d\n", i, npages);
+                        mxlnd_free_pages(p);
+                        return -ENOMEM;
+                }
+                cfs_spin_lock(&kmxlnd_data.kmx_mem_lock);
+                kmxlnd_data.kmx_mem_used += PAGE_SIZE;
+                cfs_spin_unlock(&kmxlnd_data.kmx_mem_lock);
         }
-        spin_lock(&kmxlnd_data.kmx_global_lock);
-        kmxlnd_data.kmx_mem_used += MXLND_EAGER_SIZE;
-        spin_unlock(&kmxlnd_data.kmx_global_lock);
-        ctx->mxc_msg = (struct kmx_msg *)((char *)page_address(ctx->mxc_page));
-        ctx->mxc_seg.segment_ptr = MX_PA_TO_U64(lnet_page2phys(ctx->mxc_page));
-        ctx->mxc_state = MXLND_CTX_IDLE;
 
-        *ctxp = ctx;
+        *pp = p;
         return 0;
-
-failed:
-        mxlnd_ctx_free(ctx);
-        return ret;
 }
 
 /**
@@ -112,7 +112,7 @@ failed:
  * @ctx - a kmx_ctx pointer
  */
 void
-mxlnd_ctx_init(struct kmx_ctx *ctx)
+mxlnd_ctx_init(kmx_ctx_t *ctx)
 {
         if (ctx == NULL) return;
 
@@ -120,27 +120,21 @@ mxlnd_ctx_init(struct kmx_ctx *ctx)
         ctx->mxc_incarnation = 0;
         ctx->mxc_deadline = 0;
         ctx->mxc_state = MXLND_CTX_IDLE;
-        /* ignore mxc_global_list */
-        if (ctx->mxc_list.next != NULL && !list_empty(&ctx->mxc_list)) {
-                if (ctx->mxc_peer != NULL)
-                        spin_lock(&ctx->mxc_lock);
-                list_del_init(&ctx->mxc_list);
-                if (ctx->mxc_peer != NULL)
-                        spin_unlock(&ctx->mxc_lock);
-        }
+        if (!cfs_list_empty(&ctx->mxc_list))
+                cfs_list_del_init(&ctx->mxc_list);
         /* ignore mxc_rx_list */
-        /* ignore mxc_lock */
-        ctx->mxc_nid = 0;
-        ctx->mxc_peer = NULL;
-        ctx->mxc_conn = NULL;
+        if (ctx->mxc_type == MXLND_REQ_TX) {
+                ctx->mxc_nid = 0;
+                ctx->mxc_peer = NULL;
+                ctx->mxc_conn = NULL;
+        }
         /* ignore mxc_msg */
-        /* ignore mxc_page */
         ctx->mxc_lntmsg[0] = NULL;
         ctx->mxc_lntmsg[1] = NULL;
         ctx->mxc_msg_type = 0;
         ctx->mxc_cookie = 0LL;
         ctx->mxc_match = 0LL;
-        /* ctx->mxc_seg.segment_ptr points to mxc_page */
+        /* ctx->mxc_seg.segment_ptr points to backing page */
         ctx->mxc_seg.segment_length = 0;
         if (ctx->mxc_seg_list != NULL) {
                 LASSERT(ctx->mxc_nseg > 0);
@@ -149,15 +143,15 @@ mxlnd_ctx_init(struct kmx_ctx *ctx)
         ctx->mxc_seg_list = NULL;
         ctx->mxc_nseg = 0;
         ctx->mxc_nob = 0;
-        ctx->mxc_mxreq = NULL;
+        memset(&ctx->mxc_mxreq, 0, sizeof(mx_request_t));
         memset(&ctx->mxc_status, 0, sizeof(mx_status_t));
+        ctx->mxc_errno = 0;
         /* ctx->mxc_get */
         /* ctx->mxc_put */
 
         ctx->mxc_msg->mxm_type = 0;
         ctx->mxc_msg->mxm_credits = 0;
         ctx->mxc_msg->mxm_nob = 0;
-        ctx->mxc_msg->mxm_seq = 0;
 
         return;
 }
@@ -170,13 +164,24 @@ mxlnd_ctx_init(struct kmx_ctx *ctx)
 void
 mxlnd_free_txs(void)
 {
-        struct kmx_ctx          *tx     = NULL;
-        struct kmx_ctx          *next   = NULL;
-
-        list_for_each_entry_safe(tx, next, &kmxlnd_data.kmx_txs, mxc_global_list) {
-                list_del_init(&tx->mxc_global_list);
-                mxlnd_ctx_free(tx);
+        int             i       = 0;
+        kmx_ctx_t       *tx     = NULL;
+
+        if (kmxlnd_data.kmx_tx_pages) {
+                for (i = 0; i < MXLND_TX_MSGS(); i++) {
+                        tx = &kmxlnd_data.kmx_txs[i];
+                        if (tx->mxc_seg_list != NULL) {
+                                LASSERT(tx->mxc_nseg > 0);
+                                MXLND_FREE(tx->mxc_seg_list,
+                                           tx->mxc_nseg *
+                                           sizeof(*tx->mxc_seg_list));
+                        }
+                }
+                MXLND_FREE(kmxlnd_data.kmx_txs,
+                            MXLND_TX_MSGS() * sizeof(kmx_ctx_t));
+                mxlnd_free_pages(kmxlnd_data.kmx_tx_pages);
         }
+
         return;
 }
 
@@ -191,317 +196,95 @@ mxlnd_init_txs(void)
 {
         int             ret     = 0;
         int             i       = 0;
-        struct kmx_ctx  *tx      = NULL;
+        int             ipage   = 0;
+        int             offset  = 0;
+        void           *addr    = NULL;
+        kmx_ctx_t      *tx      = NULL;
+        kmx_pages_t    *pages   = NULL;
+        struct page    *page    = NULL;
 
-        for (i = 0; i < *kmxlnd_tunables.kmx_ntx; i++) {
-                ret = mxlnd_ctx_alloc(&tx, MXLND_REQ_TX);
-                if (ret != 0) {
-                        mxlnd_free_txs();
-                        return ret;
-                }
-                mxlnd_ctx_init(tx);
-                /* in startup(), no locks required */
-                list_add_tail(&tx->mxc_global_list, &kmxlnd_data.kmx_txs);
-                list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle);
-        }
-        return 0;
-}
+        /* pre-mapped messages are not bigger than 1 page */
+        CLASSERT(MXLND_MSG_SIZE <= PAGE_SIZE);
 
-/**
- * mxlnd_free_rxs - free initial kmx_rx descriptors and associated pages
- *
- * Called from mxlnd_shutdown()
- */
-void
-mxlnd_free_rxs(void)
-{
-        struct kmx_ctx          *rx     = NULL;
-        struct kmx_ctx          *next   = NULL;
+        /* No fancy arithmetic when we do the buffer calculations */
+        CLASSERT (PAGE_SIZE % MXLND_MSG_SIZE == 0);
 
-        list_for_each_entry_safe(rx, next, &kmxlnd_data.kmx_rxs, mxc_global_list) {
-                list_del_init(&rx->mxc_global_list);
-                mxlnd_ctx_free(rx);
+        ret = mxlnd_alloc_pages(&pages, MXLND_TX_MSG_PAGES());
+        if (ret != 0) {
+                CERROR("Can't allocate tx pages\n");
+                return -ENOMEM;
         }
-        return;
-}
+        kmxlnd_data.kmx_tx_pages = pages;
 
-/**
- * mxlnd_init_rxs - allocate initial rx descriptors 
- *
- * Called from startup(). We create MXLND_MAX_PEERS plus MXLND_NTX
- * rx descriptors. We create one for each potential peer to handle 
- * the initial connect request. We create on for each tx in case the 
- * send requires a non-eager receive.
- *
- * Returns 0 on success, else -ENOMEM
- */
-int
-mxlnd_init_rxs(void)
-{
-        int             ret     = 0;
-        int             i       = 0;
-        struct kmx_ctx  *rx      = NULL;
-
-        for (i = 0; i < (*kmxlnd_tunables.kmx_ntx + *kmxlnd_tunables.kmx_max_peers); i++) {
-                ret = mxlnd_ctx_alloc(&rx, MXLND_REQ_RX);
-                if (ret != 0) {
-                        mxlnd_free_rxs();
-                        return ret;
-                }
-                mxlnd_ctx_init(rx);
-                /* in startup(), no locks required */
-                list_add_tail(&rx->mxc_global_list, &kmxlnd_data.kmx_rxs);
-                list_add_tail(&rx->mxc_list, &kmxlnd_data.kmx_rx_idle);
+        MXLND_ALLOC(kmxlnd_data.kmx_txs, MXLND_TX_MSGS() * sizeof(kmx_ctx_t));
+        if (&kmxlnd_data.kmx_txs == NULL) {
+                CERROR("Can't allocate %d tx descriptors\n", MXLND_TX_MSGS());
+                mxlnd_free_pages(pages);
+                return -ENOMEM;
         }
-        return 0;
-}
 
-/**
- * mxlnd_free_peers - free peers
- *
- * Called from mxlnd_shutdown()
- */
-void
-mxlnd_free_peers(void)
-{
-        int                      i      = 0;
-        struct kmx_peer         *peer   = NULL;
-        struct kmx_peer         *next   = NULL;
+        memset(kmxlnd_data.kmx_txs, 0, MXLND_TX_MSGS() * sizeof(kmx_ctx_t));
 
-        for (i = 0; i < MXLND_HASH_SIZE; i++) {
-                list_for_each_entry_safe(peer, next, &kmxlnd_data.kmx_peers[i], mxp_peers) {
-                        list_del_init(&peer->mxp_peers);
-                        if (peer->mxp_conn) mxlnd_conn_decref(peer->mxp_conn);
-                        mxlnd_peer_decref(peer);
-                }
-        }
-}
+        for (i = 0; i < MXLND_TX_MSGS(); i++) {
 
-int
-mxlnd_host_alloc(struct kmx_host **hostp)
-{
-        struct kmx_host *host   = NULL;
+                tx = &kmxlnd_data.kmx_txs[i];
+                tx->mxc_type = MXLND_REQ_TX;
 
-        MXLND_ALLOC(host, sizeof (*host));
-        if (host == NULL) {
-                CDEBUG(D_NETERROR, "Cannot allocate host\n");
-                return -1;
-        }
-        memset(host, 0, sizeof(*host));
-        spin_lock_init(&host->mxh_lock);
+                CFS_INIT_LIST_HEAD(&tx->mxc_list);
 
-        *hostp = host;
+                /* map mxc_msg to page */
+                page = pages->mxg_pages[ipage];
+                addr = page_address(page);
+                LASSERT(addr != NULL);
+                tx->mxc_msg = (kmx_msg_t *)(addr + offset);
+                tx->mxc_seg.segment_ptr = MX_PA_TO_U64(virt_to_phys(tx->mxc_msg));
 
-        return 0;
-}
+                mxlnd_ctx_init(tx);
 
-void
-mxlnd_host_free(struct kmx_host *host)
-{
-        if (host == NULL) return;
+                offset += MXLND_MSG_SIZE;
+                LASSERT (offset <= PAGE_SIZE);
 
-        if (host->mxh_hostname != NULL)
-                MXLND_FREE(host->mxh_hostname, strlen(host->mxh_hostname) + 1);
+                if (offset == PAGE_SIZE) {
+                        offset = 0;
+                        ipage++;
+                        LASSERT (ipage <= MXLND_TX_MSG_PAGES());
+                }
 
-        MXLND_FREE(host, sizeof(*host));
-        return;
+                /* in startup(), no locks required */
+                cfs_list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle);
+        }
+
+        return 0;
 }
 
 /**
- * mxlnd_free_hosts - free kmx_hosts
+ * mxlnd_free_peers - free peers
  *
  * Called from mxlnd_shutdown()
  */
 void
-mxlnd_free_hosts(void)
-{
-        struct kmx_host         *host   = NULL;
-        struct kmx_host         *next   = NULL;
-
-        list_for_each_entry_safe(host, next, &kmxlnd_data.kmx_hosts, mxh_list) {
-                list_del_init(&host->mxh_list);
-                mxlnd_host_free(host);
-        }
-        return;
-}
-
-#define xstr(s) #s
-#define str(s) xstr(s)
-#define MXLND_MAX_BOARD 4       /* we expect hosts to have fewer NICs than this */
-#define MXLND_MAX_EP_ID 16      /* we expect hosts to have less than this endpoints */
-
-/* this parses a line that consists of:
- * 
- * IP              HOSTNAME           BOARD        ENDPOINT ID
- * 169.192.0.113   mds01              0            3
- * 
- * By default MX uses the alias (short hostname). If you override
- * it using mx_hostname to use the FQDN or some other name, the hostname
- * here must match exactly.
- */
-
-/* MX_MAX_HOSTNAME_LEN = 80. See myriexpress.h */
-int
-mxlnd_parse_line(char *line)
-{
-        int             i               = 0;
-        int             ret             = 0;
-        int             len             = 0;
-        u32             ip[4]           = { 0, 0, 0, 0 };
-        char            hostname[MX_MAX_HOSTNAME_LEN];
-        u32             board           = -1;
-        u32             ep_id           = -1;
-        struct kmx_host *host           = NULL;
-
-        if (line == NULL) return -1;
-
-        len = strlen(line);
-
-        if (len == 0) return -1;
-
-        /* convert tabs to spaces */
-        for (i = 0; i < len; i++) {
-                if (line[i] == '\t') line[i] = ' ';
-        }
-
-        memset(&hostname, 0 , sizeof(hostname));
-        ret = sscanf(line, "%d.%d.%d.%d %" str(MX_MAX_HOSTNAME_LEN) "s %d %d", 
-                     &ip[0], &ip[1], &ip[2], &ip[3], hostname, &board, &ep_id);
-
-        if (ret != 7) {
-                return -1;
-        }
-
-        /* check for valid values */
-        /* we assume a valid IP address (all <= 255), number of NICs,
-         * and number of endpoint IDs */
-        if (ip[0] > 255 || ip [1] > 255 || ip[2] > 255 || ip[3] > 255 ||
-            board > MXLND_MAX_BOARD || ep_id > MXLND_MAX_EP_ID) {
-                CDEBUG(D_NETERROR, "Illegal value in \"%s\". Ignoring "
-                                   "this host.\n", line);
-                return -1;
-        }
-
-        ret = mxlnd_host_alloc(&host);
-        if (ret != 0) return -1;
-
-        host->mxh_addr = ((ip[0]<<24)|(ip[1]<<16)|(ip[2]<<8)|ip[3]);
-        len = strlen(hostname);
-        MXLND_ALLOC(host->mxh_hostname, len + 1);
-        memset(host->mxh_hostname, 0, len + 1);
-        strncpy(host->mxh_hostname, hostname, len);
-        host->mxh_board = board;
-        host->mxh_ep_id = ep_id;
-
-        spin_lock(&kmxlnd_data.kmx_hosts_lock);
-        list_add_tail(&host->mxh_list, &kmxlnd_data.kmx_hosts);
-        spin_unlock(&kmxlnd_data.kmx_hosts_lock);
-
-        return 0;
-}
-
-void
-mxlnd_print_hosts(void)
-{
-#if MXLND_DEBUG
-        struct kmx_host         *host   = NULL;
-
-        list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) {
-                int             ip[4];
-                u32             addr    = host->mxh_addr;
-
-                ip[0] = (addr >> 24) & 0xff;
-                ip[1] = (addr >> 16) & 0xff;
-                ip[2] = (addr >>  8) & 0xff;
-                ip[3] = addr & 0xff;
-                CDEBUG(D_NET, "\tip= %d.%d.%d.%d\n\thost= %s\n\tboard= %d\n\tep_id= %d\n\n",
-                            ip[0], ip[1], ip[2], ip[3],
-                            host->mxh_hostname, host->mxh_board, host->mxh_ep_id);
-        }
-#endif
-        return;
-}
-
-#define MXLND_BUFSIZE (PAGE_SIZE - 1)
-
-int
-mxlnd_parse_hosts(char *filename)
+mxlnd_free_peers(void)
 {
-        int             ret             = 0;
-        s32             size            = 0;
-        s32             bufsize         = MXLND_BUFSIZE;
-        s32             allocd          = 0;
-        loff_t          offset          = 0;
-        struct file     *filp           = NULL;
-        char            *buf            = NULL;
-        s32             buf_off         = 0;
-        char            *sep            = NULL;
-        char            *line           = NULL;
-
-        if (filename == NULL) return -1;
-
-        filp = filp_open(filename, O_RDONLY, 0);
-        if (IS_ERR(filp)) {
-                CERROR("filp_open() failed for %s\n", filename);
-                return -1;
-        }
-
-        size = (s32) cfs_filp_size(filp);
-        if (size < MXLND_BUFSIZE) bufsize = size;
-        allocd = bufsize;
-        MXLND_ALLOC(buf, allocd + 1);
-        if (buf == NULL) {
-                CERROR("Cannot allocate buf\n");
-                filp_close(filp, current->files);
-                return -1;
-        }
-
-        while (offset < size) {
-                memset(buf, 0, bufsize + 1);
-                ret = kernel_read(filp, (unsigned long) offset, buf, (unsigned long) bufsize);
-                if (ret < 0) {
-                        CDEBUG(D_NETERROR, "kernel_read() returned %d - closing %s\n", ret, filename);
-                        filp_close(filp, current->files);
-                        MXLND_FREE(buf, allocd + 1);
-                        return -1;
-                }
+        int             i      = 0;
+        int             count  = 0;
+        kmx_peer_t     *peer   = NULL;
+        kmx_peer_t     *next   = NULL;
 
-                if (ret < bufsize) bufsize = ret;
-                buf_off = 0;
-                while (buf_off < bufsize) {
-                        sep = strchr(buf + buf_off, '\n');
-                        if (sep != NULL) {
-                                /* we have a line */
-                                line = buf + buf_off;
-                                *sep = '\0';
-                                ret = mxlnd_parse_line(line);
-                                if (ret != 0 && strlen(line) != 0) {
-                                        CDEBUG(D_NETERROR, "Failed to parse \"%s\". Ignoring this host.\n", line);
-                                }
-                                buf_off += strlen(line) + 1;
-                        } else {
-                                /* last line or we need to read more */
-                                line = buf + buf_off;
-                                ret = mxlnd_parse_line(line);
-                                if (ret != 0) {
-                                        bufsize -= strlen(line) + 1;
-                                }
-                                buf_off += strlen(line) + 1;
-                        }
+        for (i = 0; i < MXLND_HASH_SIZE; i++) {
+                cfs_list_for_each_entry_safe(peer, next,
+                                             &kmxlnd_data.kmx_peers[i],
+                                             mxp_list) {
+                        cfs_list_del_init(&peer->mxp_list);
+                        if (peer->mxp_conn) mxlnd_conn_decref(peer->mxp_conn);
+                        mxlnd_peer_decref(peer);
+                        count++;
                 }
-                offset += bufsize;
-                bufsize = MXLND_BUFSIZE;
         }
-
-        MXLND_FREE(buf, allocd + 1);
-        filp_close(filp, current->files);
-        mxlnd_print_hosts();
-
-        return 0;
+        CDEBUG(D_NET, "%s: freed %d peers\n", __func__, count);
 }
 
 /**
- * mxlnd_init_mx - open the endpoint, set out ID, register the EAGER callback
+ * mxlnd_init_mx - open the endpoint, set our ID, register the EAGER callback
  * @ni - the network interface
  *
  * Returns 0 on success, -1 on failure
@@ -510,13 +293,14 @@ int
 mxlnd_init_mx(lnet_ni_t *ni)
 {
         int                     ret     = 0;
-        int                     found   = 0;
         mx_return_t             mxret;
-        mx_endpoint_addr_t      addr;
         u32                     board   = *kmxlnd_tunables.kmx_board;
         u32                     ep_id   = *kmxlnd_tunables.kmx_ep_id;
         u64                     nic_id  = 0LL;
-        struct kmx_host         *host   = NULL;
+        char                    *ifname = NULL;
+        __u32                   ip;
+        __u32                   netmask;
+        int                     if_up   = 0;
 
         mxret = mx_init();
         if (mxret != MX_SUCCESS) {
@@ -524,48 +308,51 @@ mxlnd_init_mx(lnet_ni_t *ni)
                 return -1;
         }
 
-        ret = mxlnd_parse_hosts(*kmxlnd_tunables.kmx_hosts);
-        if (ret != 0) {
-                if (*kmxlnd_tunables.kmx_hosts != NULL) {
-                        CERROR("mxlnd_parse_hosts(%s) failed\n", *kmxlnd_tunables.kmx_hosts);
+        if (ni->ni_interfaces[0] != NULL) {
+                /* Use the IPoMX interface specified in 'networks=' */
+
+                CLASSERT (LNET_MAX_INTERFACES > 1);
+                if (ni->ni_interfaces[1] != NULL) {
+                        CERROR("Multiple interfaces not supported\n");
+                        goto failed_with_init;
                 }
-                mx_finalize();
-                return -1;
+
+                ifname = ni->ni_interfaces[0];
+        } else {
+                ifname = *kmxlnd_tunables.kmx_default_ipif;
         }
 
-        list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) {
-                if (strcmp(host->mxh_hostname, system_utsname.nodename) == 0) {
-                        /* override the defaults and module parameters with 
-                         * the info from the hosts file */
-                        board = host->mxh_board;
-                        ep_id = host->mxh_ep_id;
-                        kmxlnd_data.kmx_localhost = host;
-                        CDEBUG(D_NET, "my hostname is %s board %d ep_id %d\n", kmxlnd_data.kmx_localhost->mxh_hostname, kmxlnd_data.kmx_localhost->mxh_board, kmxlnd_data.kmx_localhost->mxh_ep_id);
-                        found = 1;
-                        break;
-                }
+        ret = libcfs_ipif_query(ifname, &if_up, &ip, &netmask);
+        if (ret != 0) {
+                CERROR("Can't query IPoMX interface %s: %d\n",
+                       ifname, ret);
+                goto failed_with_init;
         }
 
-        if (found == 0) {
-                CERROR("no host entry found for localhost\n");
-                mx_finalize();
-                return -1;
+        if (!if_up) {
+                CERROR("Can't query IPoMX interface %s: it's down\n",
+                       ifname);
+                goto failed_with_init;
         }
 
-        mxret = mx_open_endpoint(board, ep_id, MXLND_MSG_MAGIC, 
+        mxret = mx_open_endpoint(board, ep_id, MXLND_MSG_MAGIC,
                                  NULL, 0, &kmxlnd_data.kmx_endpt);
         if (mxret != MX_SUCCESS) {
                 CERROR("mx_open_endpoint() failed with %d\n", mxret);
-                mx_finalize();
-                return -1;
+                goto failed_with_init;
         }
 
-        mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &addr);
-        mx_decompose_endpoint_addr(addr, &nic_id, &ep_id);
-
-        LASSERT(host != NULL);
-        ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), host->mxh_addr);
+        mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &kmxlnd_data.kmx_epa);
+        mx_decompose_endpoint_addr(kmxlnd_data.kmx_epa, &nic_id, &ep_id);
+        mxret = mx_connect(kmxlnd_data.kmx_endpt, nic_id, ep_id,
+                           MXLND_MSG_MAGIC, MXLND_CONNECT_TIMEOUT/CFS_HZ*1000,
+                           &kmxlnd_data.kmx_epa);
+        if (mxret != MX_SUCCESS) {
+                CNETERR("unable to connect to myself (%s)\n", mx_strerror(mxret));
+                goto failed_with_endpoint;
+        }
 
+        ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
         CDEBUG(D_NET, "My NID is 0x%llx\n", ni->ni_nid);
 
         /* this will catch all unexpected receives. */
@@ -573,21 +360,24 @@ mxlnd_init_mx(lnet_ni_t *ni)
                                           (mx_unexp_handler_t) mxlnd_unexpected_recv,
                                           NULL);
         if (mxret != MX_SUCCESS) {
-                CERROR("mx_register_unexp_callback() failed with %s\n", 
+                CERROR("mx_register_unexp_callback() failed with %s\n",
                          mx_strerror(mxret));
-                mx_close_endpoint(kmxlnd_data.kmx_endpt);
-                mx_finalize();
-                return -1;
+                goto failed_with_endpoint;
         }
-        mxret = mx_set_request_timeout(kmxlnd_data.kmx_endpt, NULL, MXLND_COMM_TIMEOUT/HZ*1000);
+        mxret = mx_set_request_timeout(kmxlnd_data.kmx_endpt, NULL,
+                                       MXLND_COMM_TIMEOUT/CFS_HZ*1000);
         if (mxret != MX_SUCCESS) {
-                CERROR("mx_set_request_timeout() failed with %s\n", 
+                CERROR("mx_set_request_timeout() failed with %s\n",
                         mx_strerror(mxret));
-                mx_close_endpoint(kmxlnd_data.kmx_endpt);
-                mx_finalize();
-                return -1;
+                goto failed_with_endpoint;
         }
         return 0;
+
+failed_with_endpoint:
+        mx_close_endpoint(kmxlnd_data.kmx_endpt);
+failed_with_init:
+        mx_finalize();
+        return -1;
 }
 
 
@@ -604,13 +394,13 @@ mxlnd_thread_start(int (*fn)(void *arg), void *arg)
         int     pid = 0;
         int     i   = (int) ((long) arg);
 
-        atomic_inc(&kmxlnd_data.kmx_nthreads);
-        init_completion(&kmxlnd_data.kmx_completions[i]);
+        cfs_atomic_inc(&kmxlnd_data.kmx_nthreads);
+        cfs_init_completion(&kmxlnd_data.kmx_completions[i]);
 
-        pid = kernel_thread (fn, arg, 0);
-        if (pid <= 0) {
-                CERROR("mx_thread_start() failed with %d\n", pid);
-                atomic_dec(&kmxlnd_data.kmx_nthreads);
+        pid = cfs_kernel_thread (fn, arg, 0);
+        if (pid < 0) {
+                CERROR("cfs_kernel_thread() failed with %d\n", pid);
+                cfs_atomic_dec(&kmxlnd_data.kmx_nthreads);
         }
         return pid;
 }
@@ -625,8 +415,8 @@ void
 mxlnd_thread_stop(long id)
 {
         int     i       = (int) id;
-        atomic_dec (&kmxlnd_data.kmx_nthreads);
-        complete(&kmxlnd_data.kmx_completions[i]);
+        cfs_atomic_dec (&kmxlnd_data.kmx_nthreads);
+        cfs_complete(&kmxlnd_data.kmx_completions[i]);
 }
 
 /**
@@ -638,27 +428,34 @@ mxlnd_thread_stop(long id)
 void
 mxlnd_shutdown (lnet_ni_t *ni)
 {
-        int             i               = 0;
+        int                     i               = 0;
+        int                     nthreads        = MXLND_NDAEMONS
+                                                  + *kmxlnd_tunables.kmx_n_waitd;
 
         LASSERT (ni == kmxlnd_data.kmx_ni);
         LASSERT (ni->ni_data == &kmxlnd_data);
         CDEBUG(D_NET, "in shutdown()\n");
 
         CDEBUG(D_MALLOC, "before MXLND cleanup: libcfs_kmemory %d "
-                         "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory), 
+                         "kmx_mem_used %ld\n", cfs_atomic_read(&libcfs_kmemory),
                          kmxlnd_data.kmx_mem_used);
 
+
+        CDEBUG(D_NET, "setting shutdown = 1\n");
+        cfs_atomic_set(&kmxlnd_data.kmx_shutdown, 1);
+
         switch (kmxlnd_data.kmx_init) {
 
         case MXLND_INIT_ALL:
 
-                CDEBUG(D_NET, "setting shutdown = 1\n");
-                /* set shutdown and wakeup request_waitds */
-                kmxlnd_data.kmx_shutdown = 1;
-                mb();
+                /* calls write_[un]lock(kmx_global_lock) */
+                mxlnd_del_peer(LNET_NID_ANY);
+
+                /* wakeup request_waitds */
                 mx_wakeup(kmxlnd_data.kmx_endpt);
-                up(&kmxlnd_data.kmx_tx_queue_sem);
-                mxlnd_sleep(2 * HZ);
+                cfs_up(&kmxlnd_data.kmx_tx_queue_sem);
+                cfs_up(&kmxlnd_data.kmx_conn_sem);
+                mxlnd_sleep(2 * CFS_HZ);
 
                 /* fall through */
 
@@ -666,14 +463,14 @@ mxlnd_shutdown (lnet_ni_t *ni)
 
                 CDEBUG(D_NET, "waiting on threads\n");
                 /* wait for threads to complete */
-                for (i = 0; i < MXLND_NCOMPLETIONS; i++) {
-                        wait_for_completion(&kmxlnd_data.kmx_completions[i]);
+                for (i = 0; i < nthreads; i++) {
+                        cfs_wait_for_completion(&kmxlnd_data.kmx_completions[i]);
                 }
-                LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
+                LASSERT(cfs_atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
 
                 CDEBUG(D_NET, "freeing completions\n");
-                MXLND_FREE(kmxlnd_data.kmx_completions, 
-                            MXLND_NCOMPLETIONS * sizeof(struct completion));
+                MXLND_FREE(kmxlnd_data.kmx_completions,
+                            nthreads * sizeof(cfs_completion_t));
 
                 /* fall through */
 
@@ -681,26 +478,10 @@ mxlnd_shutdown (lnet_ni_t *ni)
 
                 CDEBUG(D_NET, "stopping mx\n");
 
-                /* wakeup waiters if they missed the above.
-                 * close endpoint to stop all traffic.
-                 * this will cancel and cleanup all requests, etc. */
-
-                mx_wakeup(kmxlnd_data.kmx_endpt);
+                /* no peers left, close the endpoint */
                 mx_close_endpoint(kmxlnd_data.kmx_endpt);
                 mx_finalize();
 
-                CDEBUG(D_NET, "mxlnd_free_hosts();\n");
-                mxlnd_free_hosts();
-
-                /* fall through */
-
-        case MXLND_INIT_RXS:
-
-                CDEBUG(D_NET, "freeing rxs\n");
-
-                /* free all rxs and associated pages */
-                mxlnd_free_rxs();
-
                 /* fall through */
 
         case MXLND_INIT_TXS:
@@ -716,9 +497,12 @@ mxlnd_shutdown (lnet_ni_t *ni)
 
                 CDEBUG(D_NET, "freeing peers\n");
 
-                /* free peer list */
+                /* peers should be gone, but check again */
                 mxlnd_free_peers();
 
+                /* conn zombies should be gone, but check again */
+                mxlnd_free_conn_zombies();
+
                 /* fall through */
 
         case MXLND_INIT_NOTHING:
@@ -727,7 +511,7 @@ mxlnd_shutdown (lnet_ni_t *ni)
         CDEBUG(D_NET, "shutdown complete\n");
 
         CDEBUG(D_MALLOC, "after MXLND cleanup: libcfs_kmemory %d "
-                         "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory), 
+                         "kmx_mem_used %ld\n", cfs_atomic_read(&libcfs_kmemory),
                          kmxlnd_data.kmx_mem_used);
 
         kmxlnd_data.kmx_init = MXLND_INIT_NOTHING;
@@ -745,9 +529,11 @@ mxlnd_shutdown (lnet_ni_t *ni)
 int
 mxlnd_startup (lnet_ni_t *ni)
 {
-        int                     i       = 0;
-        int                     ret     = 0;
-        struct timeval          tv;
+        int             i               = 0;
+        int             ret             = 0;
+        int             nthreads        = MXLND_NDAEMONS /* tx_queued, timeoutd, connd */
+                                          + *kmxlnd_tunables.kmx_n_waitd;
+        struct timeval  tv;
 
         LASSERT (ni->ni_lnd == &the_kmxlnd);
 
@@ -756,12 +542,13 @@ mxlnd_startup (lnet_ni_t *ni)
                 return -EPERM;
         }
         CDEBUG(D_MALLOC, "before MXLND startup: libcfs_kmemory %d "
-                         "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory), 
+                         "kmx_mem_used %ld\n", cfs_atomic_read(&libcfs_kmemory),
                          kmxlnd_data.kmx_mem_used);
 
-        /* reserve 1/2 of tx for connect request messages */
-        ni->ni_maxtxcredits = *kmxlnd_tunables.kmx_ntx / 2;
-        ni->ni_peertxcredits = *kmxlnd_tunables.kmx_credits;
+        ni->ni_maxtxcredits = MXLND_TX_MSGS();
+        ni->ni_peertxcredits = *kmxlnd_tunables.kmx_peercredits;
+        if (ni->ni_maxtxcredits < ni->ni_peertxcredits)
+                ni->ni_maxtxcredits = ni->ni_peertxcredits;
 
         PORTAL_MODULE_USE;
         memset (&kmxlnd_data, 0, sizeof (kmxlnd_data));
@@ -769,37 +556,30 @@ mxlnd_startup (lnet_ni_t *ni)
         kmxlnd_data.kmx_ni = ni;
         ni->ni_data = &kmxlnd_data;
 
-        do_gettimeofday(&tv);
+        cfs_gettimeofday(&tv);
         kmxlnd_data.kmx_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-        CDEBUG(D_NET, "my incarnation is %lld\n", kmxlnd_data.kmx_incarnation);
-
-        spin_lock_init (&kmxlnd_data.kmx_global_lock);
+        CDEBUG(D_NET, "my incarnation is %llu\n", kmxlnd_data.kmx_incarnation);
 
-        INIT_LIST_HEAD (&kmxlnd_data.kmx_conn_req);
-        spin_lock_init (&kmxlnd_data.kmx_conn_lock);
-        sema_init(&kmxlnd_data.kmx_conn_sem, 0);
+        cfs_rwlock_init (&kmxlnd_data.kmx_global_lock);
+        cfs_spin_lock_init (&kmxlnd_data.kmx_mem_lock);
 
-        INIT_LIST_HEAD (&kmxlnd_data.kmx_hosts);
-        spin_lock_init (&kmxlnd_data.kmx_hosts_lock);
+        CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_conn_reqs);
+        CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_conn_zombies);
+        CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_orphan_msgs);
+        cfs_spin_lock_init (&kmxlnd_data.kmx_conn_lock);
+        cfs_sema_init(&kmxlnd_data.kmx_conn_sem, 0);
 
         for (i = 0; i < MXLND_HASH_SIZE; i++) {
-                INIT_LIST_HEAD (&kmxlnd_data.kmx_peers[i]);
+                CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_peers[i]);
         }
-        rwlock_init (&kmxlnd_data.kmx_peers_lock);
 
-        INIT_LIST_HEAD (&kmxlnd_data.kmx_txs);
-        INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_idle);
-        spin_lock_init (&kmxlnd_data.kmx_tx_idle_lock);
+        CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_idle);
+        cfs_spin_lock_init (&kmxlnd_data.kmx_tx_idle_lock);
         kmxlnd_data.kmx_tx_next_cookie = 1;
-        INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_queue);
-        spin_lock_init (&kmxlnd_data.kmx_tx_queue_lock);
-        sema_init(&kmxlnd_data.kmx_tx_queue_sem, 0);
-
-        INIT_LIST_HEAD (&kmxlnd_data.kmx_rxs);
-        spin_lock_init (&kmxlnd_data.kmx_rxs_lock);
-        INIT_LIST_HEAD (&kmxlnd_data.kmx_rx_idle);
-        spin_lock_init (&kmxlnd_data.kmx_rx_idle_lock);
-        
+        CFS_INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_queue);
+        cfs_spin_lock_init (&kmxlnd_data.kmx_tx_queue_lock);
+        cfs_sema_init(&kmxlnd_data.kmx_tx_queue_sem, 0);
+
         kmxlnd_data.kmx_init = MXLND_INIT_DATA;
         /*****************************************************/
 
@@ -811,14 +591,6 @@ mxlnd_startup (lnet_ni_t *ni)
         kmxlnd_data.kmx_init = MXLND_INIT_TXS;
         /*****************************************************/
 
-        ret = mxlnd_init_rxs();
-        if (ret != 0) {
-                CERROR("Can't alloc rx descs: %d\n", ret);
-                goto failed;
-        }
-        kmxlnd_data.kmx_init = MXLND_INIT_RXS;
-        /*****************************************************/
-
         ret = mxlnd_init_mx(ni);
         if (ret != 0) {
                 CERROR("Can't init mx\n");
@@ -830,85 +602,100 @@ mxlnd_startup (lnet_ni_t *ni)
 
         /* start threads */
 
-        MXLND_ALLOC (kmxlnd_data.kmx_completions,
-                      MXLND_NCOMPLETIONS * sizeof(struct completion));
+        MXLND_ALLOC(kmxlnd_data.kmx_completions,
+                     nthreads * sizeof(cfs_completion_t));
         if (kmxlnd_data.kmx_completions == NULL) {
-                CERROR("failed to alloc kmxlnd_data.kmx_completions");
+                CERROR("failed to alloc kmxlnd_data.kmx_completions\n");
                 goto failed;
         }
-        memset(kmxlnd_data.kmx_completions, 0, 
-               MXLND_NCOMPLETIONS * sizeof(struct completion));
+        memset(kmxlnd_data.kmx_completions, 0,
+               nthreads * sizeof(cfs_completion_t));
 
-        {
-                int     i               = 0;
-                if (MXLND_N_SCHED > *kmxlnd_tunables.kmx_n_waitd) {
-                        *kmxlnd_tunables.kmx_n_waitd = MXLND_N_SCHED;
-                }
-                CDEBUG(D_NET, "using %d %s in mx_wait_any()\n",
-                        *kmxlnd_tunables.kmx_n_waitd, 
-                        *kmxlnd_tunables.kmx_n_waitd == 1 ? "thread" : "threads");
-
-                for (i = 0; i < *kmxlnd_tunables.kmx_n_waitd; i++) {
-                        ret = mxlnd_thread_start(mxlnd_request_waitd, (void*)((long)i));
-                        if (ret < 0) {
-                                CERROR("Starting mxlnd_request_waitd[%d] failed with %d\n", i, ret);
-                                for (--i; i >= 0; i--) {
-                                        wait_for_completion(&kmxlnd_data.kmx_completions[i]);
-                                }
-                                LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
-                                MXLND_FREE(kmxlnd_data.kmx_completions, 
-                                        MXLND_NCOMPLETIONS * sizeof(struct completion));
-
-                                goto failed;
-                        }
-                }
-                ret = mxlnd_thread_start(mxlnd_tx_queued, (void*)((long)i++));
+        CDEBUG(D_NET, "using %d %s in mx_wait_any()\n",
+                *kmxlnd_tunables.kmx_n_waitd,
+                *kmxlnd_tunables.kmx_n_waitd == 1 ? "thread" : "threads");
+
+        for (i = 0; i < *kmxlnd_tunables.kmx_n_waitd; i++) {
+                ret = mxlnd_thread_start(mxlnd_request_waitd, (void*)((long)i));
                 if (ret < 0) {
-                        CERROR("Starting mxlnd_tx_queued failed with %d\n", ret);
+                        CERROR("Starting mxlnd_request_waitd[%d] failed with %d\n", i, ret);
+                        cfs_atomic_set(&kmxlnd_data.kmx_shutdown, 1);
+                        mx_wakeup(kmxlnd_data.kmx_endpt);
                         for (--i; i >= 0; i--) {
-                                wait_for_completion(&kmxlnd_data.kmx_completions[i]);
+                                cfs_wait_for_completion(&kmxlnd_data.kmx_completions[i]);
                         }
-                        LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
-                        MXLND_FREE(kmxlnd_data.kmx_completions, 
-                                MXLND_NCOMPLETIONS * sizeof(struct completion));
+                        LASSERT(cfs_atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
+                        MXLND_FREE(kmxlnd_data.kmx_completions,
+                                nthreads * sizeof(cfs_completion_t));
+
                         goto failed;
                 }
-                ret = mxlnd_thread_start(mxlnd_timeoutd, (void*)((long)i++));
-                if (ret < 0) {
-                        CERROR("Starting mxlnd_timeoutd failed with %d\n", ret);
-                        for (--i; i >= 0; i--) {
-                                wait_for_completion(&kmxlnd_data.kmx_completions[i]);
-                        }
-                        LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
-                        MXLND_FREE(kmxlnd_data.kmx_completions, 
-                                MXLND_NCOMPLETIONS * sizeof(struct completion));
-                        goto failed;
+        }
+        ret = mxlnd_thread_start(mxlnd_tx_queued, (void*)((long)i++));
+        if (ret < 0) {
+                CERROR("Starting mxlnd_tx_queued failed with %d\n", ret);
+                cfs_atomic_set(&kmxlnd_data.kmx_shutdown, 1);
+                mx_wakeup(kmxlnd_data.kmx_endpt);
+                for (--i; i >= 0; i--) {
+                        cfs_wait_for_completion(&kmxlnd_data.kmx_completions[i]);
                 }
+                LASSERT(cfs_atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
+                MXLND_FREE(kmxlnd_data.kmx_completions,
+                        nthreads * sizeof(cfs_completion_t));
+                goto failed;
+        }
+        ret = mxlnd_thread_start(mxlnd_timeoutd, (void*)((long)i++));
+        if (ret < 0) {
+                CERROR("Starting mxlnd_timeoutd failed with %d\n", ret);
+                cfs_atomic_set(&kmxlnd_data.kmx_shutdown, 1);
+                mx_wakeup(kmxlnd_data.kmx_endpt);
+                cfs_up(&kmxlnd_data.kmx_tx_queue_sem);
+                for (--i; i >= 0; i--) {
+                        cfs_wait_for_completion(&kmxlnd_data.kmx_completions[i]);
+                }
+                LASSERT(cfs_atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
+                MXLND_FREE(kmxlnd_data.kmx_completions,
+                        nthreads * sizeof(cfs_completion_t));
+                goto failed;
+        }
+        ret = mxlnd_thread_start(mxlnd_connd, (void*)((long)i++));
+        if (ret < 0) {
+                CERROR("Starting mxlnd_connd failed with %d\n", ret);
+                cfs_atomic_set(&kmxlnd_data.kmx_shutdown, 1);
+                mx_wakeup(kmxlnd_data.kmx_endpt);
+                cfs_up(&kmxlnd_data.kmx_tx_queue_sem);
+                for (--i; i >= 0; i--) {
+                        cfs_wait_for_completion(&kmxlnd_data.kmx_completions[i]);
+                }
+                LASSERT(cfs_atomic_read(&kmxlnd_data.kmx_nthreads) == 0);
+                MXLND_FREE(kmxlnd_data.kmx_completions,
+                        nthreads * sizeof(cfs_completion_t));
+                goto failed;
         }
 
         kmxlnd_data.kmx_init = MXLND_INIT_THREADS;
         /*****************************************************/
-        
+
         kmxlnd_data.kmx_init = MXLND_INIT_ALL;
         CDEBUG(D_MALLOC, "startup complete (kmx_mem_used %ld)\n", kmxlnd_data.kmx_mem_used);
-        
+
         return 0;
 failed:
         CERROR("mxlnd_startup failed\n");
-        mxlnd_shutdown (ni);    
+        mxlnd_shutdown(ni);
         return (-ENETDOWN);
 }
 
 static int mxlnd_init(void)
 {
         lnet_register_lnd(&the_kmxlnd);
-	return 0;
+        return 0;
 }
 
 static void mxlnd_exit(void)
 {
         lnet_unregister_lnd(&the_kmxlnd);
-	return;
+        return;
 }
 
 module_init(mxlnd_init);
@@ -917,4 +704,4 @@ module_exit(mxlnd_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Myricom, Inc. - help@myri.com");
 MODULE_DESCRIPTION("Kernel MyrinetExpress LND");
-MODULE_VERSION("0.5.0");
+MODULE_VERSION("0.6.0");