Whamcloud - gitweb
Land b1_x_lnet_gate onto b1_x (20081219_0014)
authorjohann <johann>
Thu, 18 Dec 2008 23:56:46 +0000 (23:56 +0000)
committerjohann <johann>
Thu, 18 Dec 2008 23:56:46 +0000 (23:56 +0000)
b=18078

Land the b1_x_lnet_gate to b1_x (used by b1_6/b1_8) since we are going to retire
b1_8_gate.

31 files changed:
lnet/ChangeLog
lnet/autoconf/lustre-lnet.m4
lnet/include/libcfs/libcfs.h
lnet/include/libcfs/linux/kp30.h
lnet/klnds/iiblnd/iiblnd_modparams.c
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c
lnet/klnds/o2iblnd/o2iblnd_modparams.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c
lnet/klnds/socklnd/socklnd_lib-darwin.c
lnet/klnds/socklnd/socklnd_lib-linux.c
lnet/klnds/socklnd/socklnd_modparams.c
lnet/klnds/viblnd/viblnd.c
lnet/klnds/viblnd/viblnd_cb.c
lnet/klnds/viblnd/viblnd_modparams.c
lnet/libcfs/debug.c
lnet/lnet/api-ni.c
lnet/lnet/lib-eq.c
lnet/lnet/lib-md.c
lnet/lnet/lib-me.c
lnet/lnet/lib-move.c
lnet/lnet/router_proc.c
lnet/selftest/rpc.c
lnet/selftest/selftest.h
lnet/ulnds/ptllnd/ptllnd.c
lnet/ulnds/ptllnd/ptllnd.h
lnet/ulnds/ptllnd/ptllnd_cb.c
lnet/utils/debug.c

index 72baa91..b650e33 100644 (file)
@@ -1,5 +1,5 @@
 tbd  Sun Microsystems, Inc.
-       * version 1.6.8
+       * version 1.8.1
        * Support for networks:
         socklnd   - any kernel supported by Lustre,
         qswlnd    - Qsnet kernel modules 5.20 and later,
@@ -12,16 +12,16 @@ tbd  Sun Microsystems, Inc.
         mxlnd     - MX 1.2.1 or later,
         ptllnd    - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
 
-Severity   : 
-Bugzilla   : 
-Description: 
-Details    : 
+Severity   :
+Bugzilla   :
+Description:
+Details    :
 
 
---------------------------------------------------------------------------
+-------------------------------------------------------------------------------
 
-01-15-2009  Sun Microsystems, Inc.
-       * version 1.6.7
+12-31-2008  Sun Microsystems, Inc.
+       * version 1.8.0
        * Support for networks:
         socklnd   - any kernel supported by Lustre,
         qswlnd    - Qsnet kernel modules 5.20 and later,
@@ -39,27 +39,51 @@ Bugzilla   :
 Description: 
 Details    : 
 
+Severity   : major
+Bugzilla   : 15983
+Description: workaround for OOM from o2iblnd
+Details    : OFED needs allocate big chunk of memory for QP while creating
+             connection for o2iblnd, OOM can happen if no such a contiguous
+             memory chunk.
+             QP size is decided by concurrent_sends and max_fragments of
+             o2iblnd, now we permit user to specify smaller value for
+             concurrent_sends of o2iblnd(i.e: concurrent_sends=7), which
+             will decrease memory block size required by creating QP.
 
---------------------------------------------------------------------------
+Severity   : major
+Bugzilla   : 15093
+Description: Support Zerocopy receive of Chelsio device
+Details    : Chelsio driver can support zerocopy for iov[1] if it's
+             contiguous and large enough.
 
-2008-08-31  Sun Microsystems, Inc.
-       * version 1.6.6
-       * Support for networks:
-        socklnd   - any kernel supported by Lustre,
-        qswlnd    - Qsnet kernel modules 5.20 and later,
-        openiblnd - IbGold 1.8.2,
-        o2iblnd   - OFED 1.1, 1.2.0, 1.2.5, and 1.3
-        viblnd    - Voltaire ibhost 3.4.5 and later,
-        ciblnd    - Topspin 3.2.0,
-        iiblnd    - Infiniserv 3.3 + PathBits patch,
-        gmlnd     - GM 2.1.22 and later,
-        mxlnd     - MX 1.2.1 or later,
-        ptllnd    - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
+Severity   : normal
+Bugzilla   : 13490
+Description: fix credit flow deadlock in uptllnd
 
-Severity   : 
-Bugzilla   : 
-Description: 
-Details    : 
+Severity   : normal
+Bugzilla   : 16308
+Description: finalize network operation in reasonable time
+Details    : conf-sanity test_32a couldn't stop ost and mds because it
+             tried to access non-existent peer and tcp connect took
+             quite long before timing out.
+
+Severity   : major
+Bugzilla   : 16338
+Description: Continuous recovery on 33 of 413 nodes after lustre oss failure
+Details    : Lost reference on conn prevents peer from being destroyed, which
+             could prevent new peer creation if peer count has reached upper
+            limit.
+
+Severity   : normal
+Bugzilla   : 16102
+Description: LNET Selftest results in Soft lockup on OSS CPU
+Details    : only hits when 8 or more o2ib clients involved and a session is
+             torn down with 'lst end_session' without preceeding 'lst stop'.
+
+Severity   : minor
+Bugzilla   : 16321
+Description: concurrent_sends in IB LNDs should not be changeable at run time
+Details    : concurrent_sends in IB LNDs should not be changeable at run time
 
 Severity   : normal
 Bugzilla   : 15272
index 69e6adf..b7c02e7 100644 (file)
@@ -523,6 +523,7 @@ if test $ENABLEO2IB -eq 0; then
        AC_MSG_RESULT([disabled])
 else
        o2ib_found=false
+
        for O2IBPATH in $O2IBPATHS; do
                if test \( -f ${O2IBPATH}/include/rdma/rdma_cm.h -a \
                           -f ${O2IBPATH}/include/rdma/ib_cm.h -a \
@@ -530,8 +531,9 @@ else
                           -f ${O2IBPATH}/include/rdma/ib_fmr_pool.h \); then
                        o2ib_found=true
                        break
-               fi
+               fi
        done
+
        if ! $o2ib_found; then
                AC_MSG_RESULT([no])
                case $ENABLEO2IB in
@@ -599,23 +601,41 @@ else
                        fi
                fi
 
-               # version checking is a hack and isn't reliable,
-               # we need verify it with each new ofed release
-
-               if grep -q ib_dma_map_single \
-                       ${O2IBPATH}/include/rdma/ib_verbs.h; then
-                       if grep -q comp_vector \
-                               ${O2IBPATH}/include/rdma/ib_verbs.h; then
-                               IBLND_OFED_VERSION="1025"
-                       else
-                               IBLND_OFED_VERSION="1020"
-                       fi
-               else
-                       IBLND_OFED_VERSION="1010"
-               fi
+               LB_LINUX_TRY_COMPILE([
+                       #include <linux/version.h>
+                       #include <linux/pci.h>
+                       #if !HAVE_GFP_T
+                       typedef int gfp_t;
+                       #endif
+                       #include <rdma/ib_verbs.h>
+               ],[
+                       ib_dma_map_single(NULL, NULL, 0, 0);
+                       return 0;
+               ],[
+                       AC_MSG_RESULT(yes)
+                       AC_DEFINE(HAVE_OFED_IB_DMA_MAP, 1,
+                                 [ib_dma_map_single defined])
+               ],[
+                       AC_MSG_RESULT(NO)
+               ])
 
-               AC_DEFINE_UNQUOTED(IBLND_OFED_VERSION, $IBLND_OFED_VERSION,
-                                  [OFED version])
+               LB_LINUX_TRY_COMPILE([
+                       #include <linux/version.h>
+                       #include <linux/pci.h>
+                       #if !HAVE_GFP_T
+                       typedef int gfp_t;
+                       #endif
+                       #include <rdma/ib_verbs.h>
+               ],[
+                       ib_create_cq(NULL, NULL, NULL, NULL, 0, 0);
+                       return 0;
+               ],[
+                       AC_MSG_RESULT(yes)
+                       AC_DEFINE(HAVE_OFED_IB_COMP_VECTOR, 1,
+                                 [has completion vector])
+               ],[
+                       AC_MSG_RESULT(NO)
+               ])
 
                EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
        fi
@@ -1193,7 +1213,7 @@ LB_LINUX_TRY_COMPILE([
         AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT, 1,
                 [kmem_cache_destroy(cachep) return int])
 ],[
-        AC_MSG_RESULT(NO)
+        AC_MSG_RESULT(no)
 ])
 ])
 
@@ -1212,7 +1232,7 @@ LB_LINUX_TRY_COMPILE([
        AC_DEFINE(HAVE_ATOMIC_PANIC_NOTIFIER, 1,
                [panic_notifier_list is atomic_notifier_head])
 ],[
-        AC_MSG_RESULT(NO)
+        AC_MSG_RESULT(no)
 ])
 ])
 
@@ -1231,7 +1251,7 @@ LB_LINUX_TRY_COMPILE([
         AC_DEFINE(HAVE_3ARGS_INIT_WORK, 1,
                   [INIT_WORK use 3 args and store data inside])
 ],[
-        AC_MSG_RESULT(NO)
+        AC_MSG_RESULT(no)
 ])
 ])
 
@@ -1248,7 +1268,7 @@ LB_LINUX_TRY_COMPILE([
         AC_DEFINE(HAVE_2ARGS_REGISTER_SYSCTL, 1,
                   [register_sysctl_table want 2 args])
 ],[
-        AC_MSG_RESULT(NO)
+        AC_MSG_RESULT(no)
 ])
 ])
 
@@ -1270,7 +1290,7 @@ LB_LINUX_TRY_COMPILE([
         AC_DEFINE(HAVE_KMEM_CACHE, 1,
                   [kernel has struct kmem_cache])
 ],[
-        AC_MSG_RESULT(NO)
+        AC_MSG_RESULT(no)
 ])
 EXTRA_KCFLAGS="$tmp_flags"
 ])
@@ -1286,7 +1306,7 @@ LB_LINUX_TRY_COMPILE([
         AC_DEFINE(HAVE_KMEM_CACHE_CREATE_DTOR, 1,
                   [kmem_cache_create has dtor argument])
 ],[
-        AC_MSG_RESULT(NO)
+        AC_MSG_RESULT(no)
 ])
 ])
 
index 4a158c1..5db6433 100644 (file)
@@ -34,6 +34,9 @@
  * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
 #ifndef __LIBCFS_LIBCFS_H__
 #define __LIBCFS_LIBCFS_H__
 
index b620d55..ac90aa0 100644 (file)
@@ -34,6 +34,9 @@
  * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
 #ifndef __LIBCFS_LINUX_KP30_H__
 #define __LIBCFS_LINUX_KP30_H__
 
index 4e02eee..8e7212d 100644 (file)
@@ -267,7 +267,7 @@ static cfs_sysctl_table_t kibnal_ctl_table[] = {
                 .procname = "concurrent_sends",
                 .data     = &concurrent_sends,
                 .maxlen   = sizeof(int),
-                .mode     = 0644,
+                .mode     = 0444,
                 .proc_handler = &proc_dointvec
         },
         {0}
@@ -276,7 +276,7 @@ static cfs_sysctl_table_t kibnal_ctl_table[] = {
 static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
         {
                 .ctl_name = CTL_IIBLND,
-                .procname = "openibnal",
+                .procname = "iibnal",
                 .data     = NULL,
                 .maxlen   = 0,
                 .mode     = 0555,
index 313f690..0416c7c 100644 (file)
@@ -738,7 +738,7 @@ kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid, int state)
                 }
         }
 
-#if (IBLND_OFED_VERSION == 1025)
+#ifdef HAVE_OFED_IB_COMP_VECTOR
         cq = ib_create_cq(cmid->device,
                           kiblnd_cq_completion, kiblnd_cq_event, conn,
                           IBLND_CQ_ENTRIES(), 0);
@@ -892,8 +892,8 @@ kiblnd_destroy_conn (kib_conn_t *conn)
                 break;
         }
 
-        if (conn->ibc_cmid->qp != NULL)
-                rdma_destroy_qp(conn->ibc_cmid);
+        if (cmid->qp != NULL)
+                rdma_destroy_qp(cmid);
 
         if (conn->ibc_cq != NULL) {
                 rc = ib_destroy_cq(conn->ibc_cq);
@@ -909,7 +909,7 @@ kiblnd_destroy_conn (kib_conn_t *conn)
 
                         LASSERT (rx->rx_nob >= 0); /* not posted */
 
-                        kiblnd_dma_unmap_single(conn->ibc_cmid->device,
+                        kiblnd_dma_unmap_single(cmid->device,
                                                 KIBLND_UNMAP_ADDR(rx, rx_msgunmap,
                                                                   rx->rx_msgaddr),
                                                 IBLND_MSG_SIZE, DMA_FROM_DEVICE);
index f699983..74ae887 100644 (file)
@@ -614,7 +614,7 @@ kiblnd_rd_size (kib_rdma_desc_t *rd)
 }
 #endif
 
-#if (IBLND_OFED_VERSION == 1020) || (IBLND_OFED_VERSION == 1025)
+#ifdef HAVE_OFED_IB_DMA_MAP
 
 static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
                                           void *msg, size_t size,
@@ -666,7 +666,7 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
 #define KIBLND_CONN_PARAM(e)            ((e)->param.conn.private_data)
 #define KIBLND_CONN_PARAM_LEN(e)        ((e)->param.conn.private_data_len)
 
-#elif (IBLND_OFED_VERSION == 1010)
+#else
 
 static inline dma_addr_t kiblnd_dma_map_single(struct ib_device *dev,
                                                void *msg, size_t size,
index 867c67f..7bef169 100644 (file)
@@ -1090,7 +1090,7 @@ kiblnd_tx_complete (kib_tx_t *tx, int status)
         if (failed) {
                 if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
                         CDEBUG(D_NETERROR, "Tx -> %s cookie "LPX64
-                               "sending %d waiting %d: failed %d\n",
+                               " sending %d waiting %d: failed %d\n",
                                libcfs_nid2str(conn->ibc_peer->ibp_nid),
                                tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
                                status);
@@ -3152,12 +3152,28 @@ kiblnd_scheduler(void *arg)
                         if (rc == 0) {
                                 rc = ib_req_notify_cq(conn->ibc_cq,
                                                       IB_CQ_NEXT_COMP);
-                                LASSERT (rc >= 0);
+                                if (rc < 0) {
+                                        CWARN("%s: ib_req_notify_cq failed: %d, "
+                                              "closing connection\n",
+                                              libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
+                                        kiblnd_close_conn(conn, -EIO);
+                                        kiblnd_conn_decref(conn);
+                                        spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
+                                        continue;
+                                }
 
                                 rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
                         }
 
-                        LASSERT (rc >= 0);
+                        if (rc < 0) {
+                                CWARN("%s: ib_poll_cq failed: %d, "
+                                      "closing connection\n",
+                                      libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
+                                kiblnd_close_conn(conn, -EIO);
+                                kiblnd_conn_decref(conn);
+                                spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
+                                continue;
+                        }
 
                         spin_lock_irqsave(&kiblnd_data.kib_sched_lock,
                                           flags);
index 707b1f1..dff7e7c 100644 (file)
@@ -255,7 +255,7 @@ static cfs_sysctl_table_t kiblnd_ctl_table[] = {
                 .procname = "concurrent_sends",
                 .data     = &concurrent_sends,
                 .maxlen   = sizeof(int),
-                .mode     = 0644,
+                .mode     = 0444,
                 .proc_handler = &proc_dointvec
         },
         {
@@ -355,8 +355,14 @@ kiblnd_tunables_init (void)
 
         if (*kiblnd_tunables.kib_concurrent_sends > IBLND_RX_MSGS)
                 *kiblnd_tunables.kib_concurrent_sends = IBLND_RX_MSGS;
-        if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE)
-                *kiblnd_tunables.kib_concurrent_sends = IBLND_MSG_QUEUE_SIZE;
+        if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE / 2)
+                *kiblnd_tunables.kib_concurrent_sends = IBLND_MSG_QUEUE_SIZE / 2;
+
+        if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE) {
+                CWARN("Concurrent sends %d is lower than message queue size: %d, "
+                      "performance may drop slightly.\n",
+                      *kiblnd_tunables.kib_concurrent_sends, IBLND_MSG_QUEUE_SIZE);
+        }
 
         return 0;
 }
index fd916ba..b29eff1 100644 (file)
@@ -1060,6 +1060,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         }
 
         memset (conn, 0, sizeof (*conn));
+
         conn->ksnc_peer = NULL;
         conn->ksnc_route = NULL;
         conn->ksnc_sock = sock;
@@ -1506,6 +1507,41 @@ ksocknal_peer_failed (ksock_peer_t *peer)
 }
 
 void
+ksocknal_finalize_zcreq(ksock_conn_t *conn)
+{
+        ksock_peer_t     *peer = conn->ksnc_peer;
+        ksock_tx_t       *tx;
+        ksock_tx_t       *tmp;
+        CFS_LIST_HEAD    (zlist);
+
+        /* NB safe to finalize TXs because closing of socket will
+         * abort all buffered data */
+        LASSERT (conn->ksnc_sock == NULL);
+
+        cfs_spin_lock(&peer->ksnp_lock);
+
+        list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
+                if (tx->tx_conn != conn)
+                        continue;
+
+                LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0);
+
+                tx->tx_msg.ksm_zc_req_cookie = 0;
+                list_del(&tx->tx_zc_list);
+                list_add(&tx->tx_zc_list, &zlist);
+        }
+
+        cfs_spin_unlock(&peer->ksnp_lock);
+
+        while (!list_empty(&zlist)) {
+                tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
+
+                list_del(&tx->tx_zc_list);
+                ksocknal_tx_decref(tx);
+        }
+}
+
+void
 ksocknal_terminate_conn (ksock_conn_t *conn)
 {
         /* This gets called by the reaper (guaranteed thread context) to
@@ -1515,10 +1551,6 @@ ksocknal_terminate_conn (ksock_conn_t *conn)
         ksock_peer_t     *peer = conn->ksnc_peer;
         ksock_sched_t    *sched = conn->ksnc_scheduler;
         int               failed = 0;
-        struct list_head *tmp;
-        struct list_head *nxt;
-        ksock_tx_t       *tx;
-        LIST_HEAD        (zlist);
 
         LASSERT(conn->ksnc_closing);
 
@@ -1541,30 +1573,6 @@ ksocknal_terminate_conn (ksock_conn_t *conn)
 
         cfs_spin_unlock_bh (&sched->kss_lock);
 
-        cfs_spin_lock(&peer->ksnp_lock);
-
-        list_for_each_safe(tmp, nxt, &peer->ksnp_zc_req_list) {
-                tx = list_entry(tmp, ksock_tx_t, tx_zc_list);
-
-                if (tx->tx_conn != conn)
-                        continue;
-
-                LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0);
-
-                tx->tx_msg.ksm_zc_req_cookie = 0;
-                list_del(&tx->tx_zc_list);
-                list_add(&tx->tx_zc_list, &zlist);
-        }
-
-        cfs_spin_unlock(&peer->ksnp_lock);
-
-        list_for_each_safe(tmp, nxt, &zlist) {
-                tx = list_entry(tmp, ksock_tx_t, tx_zc_list);
-
-                list_del(&tx->tx_zc_list);
-                ksocknal_tx_decref(tx);
-        }
-
         /* serialise with callbacks */
         cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
 
index 43d2fcd..7c5cf9f 100644 (file)
@@ -75,6 +75,13 @@ typedef struct                                  /* per scheduler state */
         struct list_head  kss_zombie_noop_txs;  /* zombie noop tx list */
         cfs_waitq_t       kss_waitq;            /* where scheduler sleeps */
         int               kss_nconns;           /* # connections assigned to this scheduler */
+#if !SOCKNAL_SINGLE_FRAG_RX
+        struct page      *kss_rx_scratch_pgs[LNET_MAX_IOV];
+#endif
+#if !SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_SINGLE_FRAG_RX
+        struct iovec      kss_scratch_iov[LNET_MAX_IOV];
+#endif
+
 } ksock_sched_t;
 
 typedef struct
@@ -113,6 +120,8 @@ typedef struct
         int              *ksnd_enable_csum;     /* enable check sum */
         int              *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
         unsigned int     *ksnd_zc_min_frag;     /* minimum zero copy frag size */
+        int              *ksnd_zc_recv;         /* enable ZC receive (for Chelsio TOE) */
+        int              *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to enable ZC receive */
 #ifdef CPU_AFFINITY
         int              *ksnd_irq_affinity;    /* enable IRQ affinity? */
 #endif
@@ -210,6 +219,7 @@ typedef struct                                  /* transmit packet */
         lnet_kiov_t            *tx_kiov;        /* packet page frags */
         struct ksock_conn      *tx_conn;        /* owning conn */
         lnet_msg_t             *tx_lnetmsg;     /* lnet message for lnet_finalize() */
+        cfs_time_t              tx_deadline;    /* when (in jiffies) tx times out */
         ksock_msg_t             tx_msg;         /* socklnd message buffer */
         int                     tx_desc_size;   /* size of this descriptor */
         union {
@@ -293,13 +303,6 @@ typedef struct ksock_conn
         cfs_atomic_t        ksnc_tx_nob;        /* # bytes queued */
         int                 ksnc_tx_ready;      /* write space */
         int                 ksnc_tx_scheduled;  /* being progressed */
-
-#if !SOCKNAL_SINGLE_FRAG_RX
-        struct iovec        ksnc_rx_scratch_iov[LNET_MAX_IOV];
-#endif
-#if !SOCKNAL_SINGLE_FRAG_TX
-        struct iovec        ksnc_tx_scratch_iov[LNET_MAX_IOV];
-#endif
 } ksock_conn_t;
 
 typedef struct ksock_route
@@ -401,6 +404,7 @@ ksocknal_conn_addref (ksock_conn_t *conn)
 }
 
 extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn);
+extern void ksocknal_finalize_zcreq(ksock_conn_t *conn);
 
 static inline void
 ksocknal_conn_decref (ksock_conn_t *conn)
@@ -434,6 +438,7 @@ ksocknal_connsock_decref (ksock_conn_t *conn)
                 LASSERT (conn->ksnc_closing);
                 libcfs_sock_release(conn->ksnc_sock);
                 conn->ksnc_sock = NULL;
+                ksocknal_finalize_zcreq(conn);
         }
 }
 
index 6fe8111..4c3f704 100644 (file)
@@ -444,6 +444,10 @@ ksocknal_check_zc_req(ksock_tx_t *tx)
 
         cfs_spin_lock(&peer->ksnp_lock);
 
+        /* ZC_REQ is going to be pinned to the peer */
+        tx->tx_deadline =
+                cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+
         LASSERT (tx->tx_msg.ksm_zc_req_cookie == 0);
         tx->tx_msg.ksm_zc_req_cookie = peer->ksnp_zc_next_cookie++;
         list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
@@ -738,10 +742,9 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
         tx->tx_conn = conn;
         ksocknal_conn_addref(conn); /* +1 ref for tx */
 
-        /* 
-         * NB Darwin: SOCK_WMEM_QUEUED()->sock_getsockopt() will take
-         * a blockable lock(socket lock), so SOCK_WMEM_QUEUED can't be
-         * put in spinlock. 
+        /*
+         * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__
+         * but they're used inside spinlocks a lot.
          */
         bufnob = libcfs_sock_wmem_queued(conn->ksnc_sock);
         cfs_spin_lock_bh (&sched->kss_lock);
@@ -961,6 +964,10 @@ ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id)
 
         if (peer->ksnp_accepting > 0 ||
             ksocknal_find_connecting_route_locked (peer) != NULL) {
+                /* the message is going to be pinned to the peer */
+                tx->tx_deadline =
+                        cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+                
                 /* Queue the message until a connection is established */
                 list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
                 cfs_write_unlock_bh (g_lock);
@@ -1291,6 +1298,16 @@ ksocknal_process_receive (ksock_conn_t *conn)
                         __swab64s(&conn->ksnc_msg.ksm_zc_ack_cookie);
                 }
 
+                if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
+                    conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
+                        CERROR("%s: Unknown message type: %x\n",
+                               libcfs_id2str(conn->ksnc_peer->ksnp_id),
+                               conn->ksnc_msg.ksm_type);
+                        ksocknal_new_packet(conn, 0);
+                        ksocknal_close_conn_and_siblings(conn, -EPROTO);
+                        return (-EPROTO);
+                }
+
                 if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
                     conn->ksnc_msg.ksm_csum != 0 &&     /* has checksum */
                     conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
@@ -1322,7 +1339,6 @@ ksocknal_process_receive (ksock_conn_t *conn)
                         ksocknal_new_packet (conn, 0);
                         return 0;       /* NOOP is done and just return */
                 }
-                LASSERT (conn->ksnc_msg.ksm_type == KSOCK_MSG_LNET);
 
                 conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
                 conn->ksnc_rx_nob_wanted = sizeof(ksock_lnet_msg_t);
@@ -2615,6 +2631,31 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer)
         return (NULL);
 }
 
+static inline void
+ksocknal_flush_stale_txs(ksock_peer_t *peer)
+{
+        ksock_tx_t        *tx;
+        CFS_LIST_HEAD      (stale_txs);
+        
+        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
+
+        while (!list_empty (&peer->ksnp_tx_queue)) {
+                tx = list_entry (peer->ksnp_tx_queue.next,
+                                 ksock_tx_t, tx_list);
+
+                if (!cfs_time_aftereq(cfs_time_current(),
+                                      tx->tx_deadline))
+                        break;
+                
+                list_del (&tx->tx_list);
+                list_add_tail (&tx->tx_list, &stale_txs);
+        }
+
+        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+
+        ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
+}
+
 void
 ksocknal_check_peer_timeouts (int idx)
 {
@@ -2644,8 +2685,50 @@ ksocknal_check_peer_timeouts (int idx)
                         ksocknal_conn_decref(conn);
                         goto again;
                 }
+
+                /* we can't process stale txs right here because we're
+                 * holding only shared lock */
+                if (!list_empty (&peer->ksnp_tx_queue)) {
+                        ksock_tx_t *tx = list_entry (peer->ksnp_tx_queue.next,
+                                                     ksock_tx_t, tx_list);
+
+                        if (cfs_time_aftereq(cfs_time_current(),
+                                             tx->tx_deadline)) {
+
+                                ksocknal_peer_addref(peer);
+                                cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+                                
+                                ksocknal_flush_stale_txs(peer);
+
+                                ksocknal_peer_decref(peer);
+                                goto again;
+                        }
+                }
         }
 
+        /* print out warnings about stale ZC_REQs */
+        list_for_each_entry(peer, peers, ksnp_list) {
+                ksock_tx_t *tx;
+                int         n = 0;
+                
+                list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
+                        if (!cfs_time_aftereq(cfs_time_current(),
+                                              tx->tx_deadline))
+                                break;
+                        n++;
+                }
+
+                if (n != 0) {
+                        tx = list_entry (peer->ksnp_zc_req_list.next,
+                                         ksock_tx_t, tx_zc_list);
+                        CWARN("Stale ZC_REQs for peer %s detected: %d; the "
+                              "oldest (%p) timed out %ld secs ago\n",
+                              libcfs_nid2str(peer->ksnp_id.nid), n, tx,
+                              cfs_duration_sec(cfs_time_current() -
+                                               tx->tx_deadline));
+                }
+        }
+        
         cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
 }
 
index fbb2a5b..70e4294 100644 (file)
@@ -215,7 +215,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
         struct iovec   *scratchiov = &scratch;
         unsigned int    niov = 1;
 #else
-        struct iovec   *scratchiov = conn->ksnc_tx_scratch_iov;
+        struct iovec   *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
         unsigned int    niov = tx->tx_niov;
 #endif
         struct msghdr msg = {
@@ -260,7 +260,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
         struct iovec *scratchiov = &scratch;
         unsigned int  niov = 1;
 #else
-        struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+        struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
         unsigned int  niov = tx->tx_nkiov;
 #endif
         struct msghdr msg = {
@@ -302,7 +302,7 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
         struct iovec *scratchiov = &scratch;
         unsigned int  niov = 1;
 #else
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+        struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
         unsigned int  niov = conn->ksnc_rx_niov;
 #endif
         struct iovec *iov = conn->ksnc_rx_iov;
@@ -342,7 +342,7 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
         struct iovec *scratchiov = &scratch;
         unsigned int  niov = 1;
 #else
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+        struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
         unsigned int  niov = conn->ksnc_rx_nkiov;
 #endif
         lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
@@ -544,7 +544,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
         struct iovec   *scratchiov = &scratch; 
         unsigned int    niov = 1;
 #else 
-        struct iovec   *scratchiov = conn->ksnc_tx_scratch_iov; 
+        struct iovec   *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; 
         unsigned int    niov = tx->tx_niov;
 #endif
         struct socket *sock = conn->ksnc_sock;
@@ -600,7 +600,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
         struct iovec *scratchiov = &scratch; 
         unsigned int  niov = 1;
 #else
-        struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; 
+        struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
         unsigned int  niov = tx->tx_nkiov;
 #endif
         struct socket *sock = conn->ksnc_sock;
@@ -738,7 +738,7 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
         struct iovec *scratchiov = &scratch; 
         unsigned int  niov = 1;
 #else 
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; 
+        struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
         unsigned int  niov = conn->ksnc_rx_niov;
 #endif
         struct iovec *iov = conn->ksnc_rx_iov;
@@ -792,7 +792,7 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
         struct iovec *scratchiov = &scratch; 
         unsigned int  niov = 1;
 #else 
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; 
+        struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
         unsigned int  niov = conn->ksnc_rx_nkiov;
 #endif
         lnet_kiov_t    *kiov = conn->ksnc_rx_kiov;
index 800d4f5..5b0a9e9 100644 (file)
@@ -60,7 +60,9 @@ enum {
         SOCKLND_KEEPALIVE_INTVL,
         SOCKLND_BACKOFF_INIT,
         SOCKLND_BACKOFF_MAX,
-        SOCKLND_PROTOCOL
+        SOCKLND_PROTOCOL,
+        SOCKLND_ZERO_COPY_RECV,
+        SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS
 };
 #else
 
@@ -84,6 +86,8 @@ enum {
 #define SOCKLND_BACKOFF_INIT    CTL_UNNUMBERED
 #define SOCKLND_BACKOFF_MAX     CTL_UNNUMBERED
 #define SOCKLND_PROTOCOL        CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY_RECV  CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS CTL_UNNUMBERED
 #endif
 
 static cfs_sysctl_table_t ksocknal_ctl_table[] = {
@@ -160,6 +164,25 @@ static cfs_sysctl_table_t ksocknal_ctl_table[] = {
                 .strategy = &sysctl_intvec,
         },
         {
+                .ctl_name = SOCKLND_ZERO_COPY_RECV,
+                .procname = "zero_copy_recv",
+                .data     = &ksocknal_tunables.ksnd_zc_recv,
+                .maxlen   = sizeof (int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+
+        {
+                .ctl_name = SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS,
+                .procname = "zero_copy_recv",
+                .data     = &ksocknal_tunables.ksnd_zc_recv_min_nfrags,
+                .maxlen   = sizeof (int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
                 .ctl_name = SOCKLND_TYPED,
                 .procname = "typed",
                 .data     = &ksocknal_tunables.ksnd_typed_conns,
@@ -292,6 +315,11 @@ cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
 int
 ksocknal_lib_tunables_init ()
 {
+        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
+                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
+        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
+                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
+
         ksocknal_tunables.ksnd_sysctl =
                 cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
 
@@ -451,7 +479,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
                 struct iovec   *scratchiov = &scratch;
                 unsigned int    niov = 1;
 #else
-                struct iovec   *scratchiov = conn->ksnc_tx_scratch_iov;
+                struct iovec   *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
                 unsigned int    niov = tx->tx_niov;
 #endif
                 struct msghdr msg = {
@@ -524,7 +552,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
 #ifdef CONFIG_HIGHMEM
 #warning "XXX risk of kmap deadlock on multiple frags..."
 #endif
-                struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+                struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
                 unsigned int  niov = tx->tx_nkiov;
 #endif
                 struct msghdr msg = {
@@ -585,7 +613,7 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
         struct iovec *scratchiov = &scratch;
         unsigned int  niov = 1;
 #else
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+        struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
         unsigned int  niov = conn->ksnc_rx_niov;
 #endif
         struct iovec *iov = conn->ksnc_rx_iov;
@@ -645,26 +673,72 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
         return rc;
 }
 
+static void
+ksocknal_lib_kiov_vunmap(void *addr)
+{
+        if (addr == NULL)
+                return;
+
+        vunmap(addr);
+}
+
+static void *
+ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
+                       struct iovec *iov, struct page **pages)
+{
+        void             *addr;
+        int               nob;
+        int               i;
+
+        if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL)
+                return NULL;
+
+        LASSERT (niov <= LNET_MAX_IOV);
+
+        if (niov < 2 ||
+            niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
+                return NULL;
+
+        for (nob = i = 0; i < niov; i++) {
+                if ((kiov[i].kiov_offset != 0 && i > 0) ||
+                    (kiov[i].kiov_offset + kiov[i].kiov_len != CFS_PAGE_SIZE && i < niov - 1))
+                        return NULL;
+
+                pages[i] = kiov[i].kiov_page;
+                nob += kiov[i].kiov_len;
+        }
+
+        addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
+        if (addr == NULL)
+                return NULL;
+
+        iov->iov_base = addr + kiov[0].kiov_offset;
+        iov->iov_len = nob;
+
+        return addr;
+}
+
 int
 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
 {
 #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
-        struct iovec  scratch;
-        struct iovec *scratchiov = &scratch;
-        unsigned int  niov = 1;
+        struct iovec   scratch;
+        struct iovec  *scratchiov = &scratch;
+        struct page  **pages      = NULL;
+        unsigned int   niov       = 1;
 #else
 #ifdef CONFIG_HIGHMEM
 #warning "XXX risk of kmap deadlock on multiple frags..."
 #endif
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
-        unsigned int  niov = conn->ksnc_rx_nkiov;
+        struct iovec  *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+        struct page  **pages      = conn->ksnc_scheduler->kss_rx_scratch_pgs;
+        unsigned int   niov       = conn->ksnc_rx_nkiov;
 #endif
         lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
         struct msghdr msg = {
                 .msg_name       = NULL,
                 .msg_namelen    = 0,
                 .msg_iov        = scratchiov,
-                .msg_iovlen     = niov,
                 .msg_control    = NULL,
                 .msg_controllen = 0,
                 .msg_flags      = 0
@@ -674,15 +748,25 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
         int          i;
         int          rc;
         void        *base;
+        void        *addr;
         int          sum;
         int          fragnob;
 
         /* NB we can't trust socket ops to either consume our iovs
          * or leave them alone. */
-        for (nob = i = 0; i < niov; i++) {
-                scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
-                nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+        if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
+                nob = scratchiov[0].iov_len;
+                msg.msg_iovlen = 1;
+
+        } else {
+                for (nob = i = 0; i < niov; i++) {
+                        nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+                        scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
+                                                 kiov[i].kiov_offset;
+                }
+                msg.msg_iovlen = niov;
         }
+
         LASSERT (nob <= conn->ksnc_rx_nob_wanted);
 
         set_fs (KERNEL_DS);
@@ -709,8 +793,13 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
                         kunmap(kiov[i].kiov_page);
                 }
         }
-        for (i = 0; i < niov; i++)
-                kunmap(kiov[i].kiov_page);
+
+        if (addr != NULL) {
+                ksocknal_lib_kiov_vunmap(addr);
+        } else {
+                for (i = 0; i < niov; i++)
+                        kunmap(kiov[i].kiov_page);
+        }
 
         return (rc);
 }
index a8cba44..32ffa3c 100644 (file)
@@ -117,6 +117,14 @@ static unsigned int zc_min_frag = (2<<10);
 CFS_MODULE_PARM(zc_min_frag, "i", int, 0644,
                 "minimum fragment to zero copy");
 
+static unsigned int zc_recv = 0;
+CFS_MODULE_PARM(zc_recv, "i", int, 0644,
+                "enable ZC recv for Chelsio driver");
+
+static unsigned int zc_recv_min_nfrags = 16;
+CFS_MODULE_PARM(zc_recv_min_nfrags, "i", int, 0644,
+                "minimum # of fragments to enable ZC recv");
+
 #ifdef SOCKNAL_BACKOFF
 static int backoff_init = 3;
 CFS_MODULE_PARM(backoff_init, "i", int, 0644,
@@ -152,6 +160,8 @@ ksock_tunables_t ksocknal_tunables = {
         .ksnd_enable_csum     = &enable_csum,
         .ksnd_inject_csum_error = &inject_csum_error,
         .ksnd_zc_min_frag     = &zc_min_frag,
+        .ksnd_zc_recv         = &zc_recv,
+        .ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags,
 #ifdef CPU_AFFINITY
         .ksnd_irq_affinity    = &enable_irq_affinity,
 #endif
index 8e15389..ce47a6c 100644 (file)
@@ -191,7 +191,7 @@ kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
 }
 
 void
-kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, 
+kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
                 lnet_nid_t dstnid, __u64 dststamp, __u64 seq)
 {
         /* CAVEAT EMPTOR! all message fields not set here should have been
@@ -260,7 +260,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
                     msg_version != IBNAL_MSG_VERSION)
                         return -EPROTO;
         } else if (msg_version != expected_version) {
-                CERROR("Bad version: %x(%x expected)\n", 
+                CERROR("Bad version: %x(%x expected)\n",
                        msg_version, expected_version);
                 return -EPROTO;
         }
@@ -286,7 +286,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
                 return -EPROTO;
         }
         msg->ibm_cksum = msg_cksum;
-        
+
         if (flip) {
                 /* leave magic unflipped as a clue to peer endianness */
                 msg->ibm_version = msg_version;
@@ -299,7 +299,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
                 __swab64s(&msg->ibm_dststamp);
                 __swab64s(&msg->ibm_seq);
         }
-        
+
         if (msg->ibm_srcnid == LNET_NID_ANY) {
                 CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
                 return -EPROTO;
@@ -309,7 +309,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
         default:
                 CERROR("Unknown message type %x\n", msg->ibm_type);
                 return -EPROTO;
-                
+
         case IBNAL_MSG_NOOP:
                 break;
 
@@ -346,14 +346,14 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
                         __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
                         __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
                 }
-                
+
                 n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
                 if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
-                        CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", 
+                        CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
                                n, IBNAL_MAX_RDMA_FRAGS);
                         return -EPROTO;
                 }
-                
+
                 if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
                         CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
                                (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
@@ -382,7 +382,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
                         __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
                         __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
                 }
-#else                
+#else
                 if (flip) {
                         __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
                         __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
@@ -390,17 +390,17 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
 
                 n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
                 if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
-                        CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", 
+                        CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
                                n, IBNAL_MAX_RDMA_FRAGS);
                         return -EPROTO;
                 }
-                
+
                 if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
                         CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
                                (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
                         return -EPROTO;
                 }
-                
+
                 if (flip)
                         for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
                                 __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
@@ -448,25 +448,25 @@ kibnal_start_listener (lnet_ni_t *ni)
 
         LASSERT (kibnal_data.kib_listen_handle == NULL);
 
-        kibnal_data.kib_listen_handle = 
+        kibnal_data.kib_listen_handle =
                 cm_create_cep(cm_cep_transp_rc);
         if (kibnal_data.kib_listen_handle == NULL) {
                 CERROR ("Can't create listen CEP\n");
                 return -ENOMEM;
         }
 
-        CDEBUG(D_NET, "Created CEP %p for listening\n", 
+        CDEBUG(D_NET, "Created CEP %p for listening\n",
                kibnal_data.kib_listen_handle);
 
         memset(&info, 0, sizeof(info));
-        info.listen_addr.end_pt.sid = 
+        info.listen_addr.end_pt.sid =
                 (__u64)(*kibnal_tunables.kib_service_number);
 
         cmrc = cm_listen(kibnal_data.kib_listen_handle, &info,
                          kibnal_listen_callback, NULL);
         if (cmrc == cm_stat_success)
                 return 0;
-        
+
         CERROR ("cm_listen error: %d\n", cmrc);
 
         cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
@@ -482,13 +482,13 @@ kibnal_stop_listener(lnet_ni_t *ni)
         cm_return_t      cmrc;
 
         LASSERT (kibnal_data.kib_listen_handle != NULL);
-        
+
         cmrc = cm_cancel(kibnal_data.kib_listen_handle);
         if (cmrc != cm_stat_success)
                 CERROR ("Error %d stopping listener\n", cmrc);
 
         cfs_pause(cfs_time_seconds(1)/10);   /* ensure no more callbacks */
-        
+
         cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
         if (cmrc != vv_return_ok)
                 CERROR ("Error %d destroying CEP\n", cmrc);
@@ -536,18 +536,18 @@ kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
                 /* npeers only grows with the global lock held */
                 atomic_inc(&kibnal_data.kib_npeers);
         }
-        
+
         write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
 
         if (rc != 0) {
                 CERROR("Can't create peer: %s\n", 
-                       (rc == -ESHUTDOWN) ? "shutting down" : 
+                       (rc == -ESHUTDOWN) ? "shutting down" :
                        "too many peers");
                 LIBCFS_FREE(peer, sizeof(*peer));
         } else {
                 *peerp = peer;
         }
-        
+
         return rc;
 }
 
@@ -561,7 +561,7 @@ kibnal_destroy_peer (kib_peer_t *peer)
         LASSERT (peer->ibp_accepting == 0);
         LASSERT (list_empty (&peer->ibp_conns));
         LASSERT (list_empty (&peer->ibp_tx_queue));
-        
+
         LIBCFS_FREE (peer, sizeof (*peer));
 
         /* NB a peer's connections keep a reference on their peer until
@@ -660,7 +660,7 @@ kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip)
 
         CDEBUG(D_NET, "%s at %u.%u.%u.%u\n",
                libcfs_nid2str(nid), HIPQUAD(ip));
-        
+
         if (nid == LNET_NID_ANY)
                 return (-EINVAL);
 
@@ -686,7 +686,7 @@ kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip)
 
         peer->ibp_ip = ip;
         peer->ibp_persistence++;
-        
+
         write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
         return (0);
 }
@@ -831,16 +831,16 @@ kibnal_debug_conn (kib_conn_t *conn)
 {
         struct list_head *tmp;
         int               i;
-        
+
         spin_lock(&conn->ibc_lock);
-        
-        CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n", 
-               atomic_read(&conn->ibc_refcount), conn, 
+
+        CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n",
+               atomic_read(&conn->ibc_refcount), conn,
                libcfs_nid2str(conn->ibc_peer->ibp_nid));
         CDEBUG(D_CONSOLE, "   txseq "LPD64" rxseq "LPD64" state %d \n",
                conn->ibc_txseq, conn->ibc_rxseq, conn->ibc_state);
         CDEBUG(D_CONSOLE, "   nposted %d cred %d o_cred %d r_cred %d\n",
-               conn->ibc_nsends_posted, conn->ibc_credits, 
+               conn->ibc_nsends_posted, conn->ibc_credits,
                conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
         CDEBUG(D_CONSOLE, "   disc %d comms_err %d\n",
                conn->ibc_disconnect, conn->ibc_comms_error);
@@ -848,7 +848,7 @@ kibnal_debug_conn (kib_conn_t *conn)
         CDEBUG(D_CONSOLE, "   early_rxs:\n");
         list_for_each(tmp, &conn->ibc_early_rxs)
                 kibnal_debug_rx(list_entry(tmp, kib_rx_t, rx_list));
-        
+
         CDEBUG(D_CONSOLE, "   tx_queue_nocred:\n");
         list_for_each(tmp, &conn->ibc_tx_queue_nocred)
                 kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
@@ -860,11 +860,11 @@ kibnal_debug_conn (kib_conn_t *conn)
         CDEBUG(D_CONSOLE, "   tx_queue:\n");
         list_for_each(tmp, &conn->ibc_tx_queue)
                 kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-        
+
         CDEBUG(D_CONSOLE, "   active_txs:\n");
         list_for_each(tmp, &conn->ibc_active_txs)
                 kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-        
+
         CDEBUG(D_CONSOLE, "   rxs:\n");
         for (i = 0; i < IBNAL_RX_MSGS; i++)
                 kibnal_debug_rx(&conn->ibc_rxs[i]);
@@ -876,20 +876,20 @@ int
 kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
 {
         static vv_qp_attr_t attr;
-        
+
         kib_connvars_t   *cv = conn->ibc_connvars;
         vv_return_t       vvrc;
-        
+
         /* Only called by connd => static OK */
         LASSERT (!in_interrupt());
         LASSERT (current == kibnal_data.kib_connd);
 
         memset(&attr, 0, sizeof(attr));
-        
+
         switch (new_state) {
         default:
                 LBUG();
-                
+
         case vv_qp_state_init: {
                 struct vv_qp_modify_init_st *init = &attr.modify.params.init;
 
@@ -899,7 +899,7 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
                 init->access_control = vv_acc_r_mem_read |
                                        vv_acc_r_mem_write; /* XXX vv_acc_l_mem_write ? */
 
-                attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX | 
+                attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX |
                                               VV_QP_AT_PHY_PORT_NUM |
                                               VV_QP_AT_ACCESS_CON_F;
                 break;
@@ -928,9 +928,9 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
 
 
                 // XXX sdp sets VV_QP_AT_OP_F but no actual optional options
-                attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC | 
+                attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC |
                                               VV_QP_AT_DEST_QP |
-                                              VV_QP_AT_R_PSN | 
+                                              VV_QP_AT_R_PSN |
                                               VV_QP_AT_MIN_RNR_NAK_T |
                                               VV_QP_AT_RESP_RDMA_ATOM_OUT_NUM |
                                               VV_QP_AT_OP_F;
@@ -944,7 +944,7 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
                 rts->retry_num                = *kibnal_tunables.kib_retry_cnt;
                 rts->rnr_num                  = *kibnal_tunables.kib_rnr_cnt;
                 rts->dest_out_rdma_r_atom_num = IBNAL_OUS_DST_RD;
-                
+
                 attr.modify.vv_qp_attr_mask = VV_QP_AT_S_PSN |
                                               VV_QP_AT_L_ACK_T |
                                               VV_QP_AT_RETRY_NUM |
@@ -957,18 +957,18 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
                 attr.modify.vv_qp_attr_mask = 0;
                 break;
         }
-                
+
         attr.modify.qp_modify_into_state = new_state;
         attr.modify.vv_qp_attr_mask |= VV_QP_AT_STATE;
-        
+
         vvrc = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &attr, NULL);
         if (vvrc != vv_return_ok) {
-                CERROR("Can't modify qp -> %s state to %d: %d\n", 
+                CERROR("Can't modify qp -> %s state to %d: %d\n",
                        libcfs_nid2str(conn->ibc_peer->ibp_nid),
                        new_state, vvrc);
                 return -EIO;
         }
-        
+
         return 0;
 }
 
@@ -988,7 +988,7 @@ kibnal_create_conn (cm_cep_handle_t cep)
         /* Only the connd creates conns => single threaded */
         LASSERT(!in_interrupt());
         LASSERT(current == kibnal_data.kib_connd);
-        
+
         LIBCFS_ALLOC(conn, sizeof (*conn));
         if (conn == NULL) {
                 CERROR ("Can't allocate connection\n");
@@ -1006,7 +1006,7 @@ kibnal_create_conn (cm_cep_handle_t cep)
         INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
         INIT_LIST_HEAD (&conn->ibc_active_txs);
         spin_lock_init (&conn->ibc_lock);
-        
+
         atomic_inc (&kibnal_data.kib_nconns);
         /* well not really, but I call destroy() on failure, which decrements */
 
@@ -1040,7 +1040,7 @@ kibnal_create_conn (cm_cep_handle_t cep)
                 vv_r_key_t      r_key;
 
                 rx->rx_conn = conn;
-                rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + 
+                rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
                              page_offset);
 
                 vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
@@ -1069,7 +1069,7 @@ kibnal_create_conn (cm_cep_handle_t cep)
         reqattr.create.qp_type                    = vv_qp_type_r_conn;
         reqattr.create.cq_send_h                  = kibnal_data.kib_cq;
         reqattr.create.cq_receive_h               = kibnal_data.kib_cq;
-        reqattr.create.send_max_outstand_wr       = (1 + IBNAL_MAX_RDMA_FRAGS) * 
+        reqattr.create.send_max_outstand_wr       = (1 + IBNAL_MAX_RDMA_FRAGS) *
                                                     (*kibnal_tunables.kib_concurrent_sends);
         reqattr.create.receive_max_outstand_wr    = IBNAL_RX_MSGS;
         reqattr.create.max_scatgat_per_send_wr    = 1;
@@ -1089,13 +1089,13 @@ kibnal_create_conn (cm_cep_handle_t cep)
         conn->ibc_state = IBNAL_CONN_INIT_QP;
         conn->ibc_connvars->cv_local_qpn = rspattr.create_return.qp_num;
 
-        if (rspattr.create_return.receive_max_outstand_wr < 
+        if (rspattr.create_return.receive_max_outstand_wr <
             IBNAL_RX_MSGS ||
-            rspattr.create_return.send_max_outstand_wr < 
+            rspattr.create_return.send_max_outstand_wr <
             (1 + IBNAL_MAX_RDMA_FRAGS) * (*kibnal_tunables.kib_concurrent_sends)) {
                 CERROR("Insufficient rx/tx work items: wanted %d/%d got %d/%d\n",
-                       IBNAL_RX_MSGS, 
-                       (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                       IBNAL_RX_MSGS,
+                       (1 + IBNAL_MAX_RDMA_FRAGS) *
                        (*kibnal_tunables.kib_concurrent_sends),
                        rspattr.create_return.receive_max_outstand_wr,
                        rspattr.create_return.send_max_outstand_wr);
@@ -1108,7 +1108,7 @@ kibnal_create_conn (cm_cep_handle_t cep)
         /* 1 ref for caller */
         atomic_set (&conn->ibc_refcount, 1);
         return (conn);
-        
+
  failed:
         kibnal_destroy_conn (conn);
         return (NULL);
@@ -1122,7 +1122,7 @@ kibnal_destroy_conn (kib_conn_t *conn)
         /* Only the connd does this (i.e. single threaded) */
         LASSERT (!in_interrupt());
         LASSERT (current == kibnal_data.kib_connd);
-        
+
         CDEBUG (D_NET, "connection %p\n", conn);
 
         LASSERT (atomic_read (&conn->ibc_refcount) == 0);
@@ -1154,16 +1154,16 @@ kibnal_destroy_conn (kib_conn_t *conn)
                 if (vvrc != vv_return_ok)
                         CERROR("Can't destroy QP: %d\n", vvrc);
                 /* fall through */
-                
+
         case IBNAL_CONN_INIT_NOTHING:
                 break;
         }
 
-        if (conn->ibc_rx_pages != NULL) 
+        if (conn->ibc_rx_pages != NULL)
                 kibnal_free_pages(conn->ibc_rx_pages);
 
         if (conn->ibc_rxs != NULL)
-                LIBCFS_FREE(conn->ibc_rxs, 
+                LIBCFS_FREE(conn->ibc_rxs,
                             IBNAL_RX_MSGS * sizeof(kib_rx_t));
 
         if (conn->ibc_connvars != NULL)
@@ -1212,7 +1212,7 @@ kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
                 CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n",
                        libcfs_nid2str(peer->ibp_nid),
                        conn->ibc_incarnation, incarnation);
-                
+
                 count++;
                 kibnal_close_conn_locked (conn, -ESTALE);
         }
@@ -1262,7 +1262,7 @@ kibnal_close_matching_conns (lnet_nid_t nid)
         /* wildcards always succeed */
         if (nid == LNET_NID_ANY)
                 return (0);
-        
+
         return (count == 0 ? -ENOENT : 0);
 }
 
@@ -1335,11 +1335,11 @@ kibnal_free_pages (kib_pages_t *p)
 {
         int         npages = p->ibp_npages;
         int         i;
-        
+
         for (i = 0; i < npages; i++)
                 if (p->ibp_pages[i] != NULL)
                         __free_page(p->ibp_pages[i]);
-        
+
         LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
 }
 
@@ -1357,7 +1357,7 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
 
         memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
         p->ibp_npages = npages;
-        
+
         for (i = 0; i < npages; i++) {
                 p->ibp_pages[i] = alloc_page (GFP_KERNEL);
                 if (p->ibp_pages[i] == NULL) {
@@ -1372,15 +1372,15 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
 }
 
 int
-kibnal_alloc_tx_descs (void) 
+kibnal_alloc_tx_descs (void)
 {
         int    i;
-        
+
         LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
                       IBNAL_TX_MSGS() * sizeof(kib_tx_t));
         if (kibnal_data.kib_tx_descs == NULL)
                 return -ENOMEM;
-        
+
         memset(kibnal_data.kib_tx_descs, 0,
                IBNAL_TX_MSGS() * sizeof(kib_tx_t));
 
@@ -1393,20 +1393,20 @@ kibnal_alloc_tx_descs (void)
                 if (tx->tx_pages == NULL)
                         return -ENOMEM;
 #else
-                LIBCFS_ALLOC(tx->tx_wrq, 
-                             (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                LIBCFS_ALLOC(tx->tx_wrq,
+                             (1 + IBNAL_MAX_RDMA_FRAGS) *
                              sizeof(*tx->tx_wrq));
                 if (tx->tx_wrq == NULL)
                         return -ENOMEM;
-                
-                LIBCFS_ALLOC(tx->tx_gl, 
-                             (1 + IBNAL_MAX_RDMA_FRAGS) * 
+
+                LIBCFS_ALLOC(tx->tx_gl,
+                             (1 + IBNAL_MAX_RDMA_FRAGS) *
                              sizeof(*tx->tx_gl));
                 if (tx->tx_gl == NULL)
                         return -ENOMEM;
-                
-                LIBCFS_ALLOC(tx->tx_rd, 
-                             offsetof(kib_rdma_desc_t, 
+
+                LIBCFS_ALLOC(tx->tx_rd,
+                             offsetof(kib_rdma_desc_t,
                                       rd_frags[IBNAL_MAX_RDMA_FRAGS]));
                 if (tx->tx_rd == NULL)
                         return -ENOMEM;
@@ -1417,7 +1417,7 @@ kibnal_alloc_tx_descs (void)
 }
 
 void
-kibnal_free_tx_descs (void) 
+kibnal_free_tx_descs (void)
 {
         int    i;
 
@@ -1433,18 +1433,18 @@ kibnal_free_tx_descs (void)
                                     sizeof(*tx->tx_pages));
 #else
                 if (tx->tx_wrq != NULL)
-                        LIBCFS_FREE(tx->tx_wrq, 
-                                    (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                        LIBCFS_FREE(tx->tx_wrq,
+                                    (1 + IBNAL_MAX_RDMA_FRAGS) *
                                     sizeof(*tx->tx_wrq));
 
                 if (tx->tx_gl != NULL)
-                        LIBCFS_FREE(tx->tx_gl, 
-                                    (1 + IBNAL_MAX_RDMA_FRAGS) * 
+                        LIBCFS_FREE(tx->tx_gl,
+                                    (1 + IBNAL_MAX_RDMA_FRAGS) *
                                     sizeof(*tx->tx_gl));
 
                 if (tx->tx_rd != NULL)
-                        LIBCFS_FREE(tx->tx_rd, 
-                                    offsetof(kib_rdma_desc_t, 
+                        LIBCFS_FREE(tx->tx_rd,
+                                    offsetof(kib_rdma_desc_t,
                                              rd_frags[IBNAL_MAX_RDMA_FRAGS]));
 #endif
         }
@@ -1455,7 +1455,7 @@ kibnal_free_tx_descs (void)
 
 #if IBNAL_USE_FMR
 void
-kibnal_free_fmrs (int n) 
+kibnal_free_fmrs (int n)
 {
         int             i;
         vv_return_t     vvrc;
@@ -1494,7 +1494,7 @@ kibnal_setup_tx_descs (void)
         /* No fancy arithmetic when we do the buffer calculations */
         CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
 
-        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, 
+        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
                                 IBNAL_TX_MSG_PAGES(), 0);
         if (rc != 0)
                 return (rc);
@@ -1550,7 +1550,7 @@ kibnal_setup_tx_descs (void)
                         LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
                 }
         }
-        
+
         return (0);
 }
 
@@ -1562,7 +1562,7 @@ kibnal_shutdown (lnet_ni_t *ni)
 
         LASSERT (ni == kibnal_data.kib_ni);
         LASSERT (ni->ni_data == &kibnal_data);
-        
+
         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
                atomic_read (&libcfs_kmemory));
 
@@ -1614,7 +1614,7 @@ kibnal_shutdown (lnet_ni_t *ni)
                                               kibnal_async_callback);
                 if (vvrc != vv_return_ok)
                         CERROR("vv_dell_async_event_cb error: %d\n", vvrc);
-                        
+
                 /* fall through */
 
         case IBNAL_INIT_HCA:
@@ -1649,7 +1649,7 @@ kibnal_shutdown (lnet_ni_t *ni)
                         cfs_pause(cfs_time_seconds(1));
                 }
                 /* fall through */
-                
+
         case IBNAL_INIT_NOTHING:
                 break;
         }
@@ -1658,7 +1658,7 @@ kibnal_shutdown (lnet_ni_t *ni)
 
         if (kibnal_data.kib_peers != NULL)
                 LIBCFS_FREE (kibnal_data.kib_peers,
-                             sizeof (struct list_head) * 
+                             sizeof (struct list_head) *
                              kibnal_data.kib_peer_hash_size);
 
         CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
@@ -1873,18 +1873,18 @@ kibnal_startup (lnet_ni_t *ni)
 
                         /* Found a suitable port. Get its GUID and PKEY. */
                         tbl_count = 1;
-                        vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca, 
+                        vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca,
                                                    port_num, &tbl_count,
                                                    &kibnal_data.kib_port_gid);
                         if (vvrc != vv_return_ok) {
                                 CERROR("vv_get_port_gid_tbl failed "
-                                       "for %s port %d: %d\n", 
+                                       "for %s port %d: %d\n",
                                        hca_name, port_num, vvrc);
                                 continue;
                         }
 
                         tbl_count = 1;
-                        vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca, 
+                        vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca,
                                                          port_num, &tbl_count,
                                                          &kibnal_data.kib_port_pkey);
                         if (vvrc != vv_return_ok) {
@@ -1912,8 +1912,8 @@ kibnal_startup (lnet_ni_t *ni)
         }
 
         CDEBUG(D_NET, "Using %s port %d - GID="LPX64":"LPX64"\n",
-               hca_name, kibnal_data.kib_port, 
-               kibnal_data.kib_port_gid.scope.g.subnet, 
+               hca_name, kibnal_data.kib_port,
+               kibnal_data.kib_port_gid.scope.g.subnet,
                kibnal_data.kib_port_gid.scope.g.eui64);
 
         /*****************************************************/
@@ -1947,7 +1947,7 @@ kibnal_startup (lnet_ni_t *ni)
                 __u32 nentries;
 
                 vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(),
-                                    kibnal_cq_callback, 
+                                    kibnal_cq_callback,
                                     NULL, /* context */
                                     &kibnal_data.kib_cq, &nentries);
                 if (vvrc != 0) {
@@ -1959,13 +1959,13 @@ kibnal_startup (lnet_ni_t *ni)
                 kibnal_data.kib_init = IBNAL_INIT_CQ;
 
                 if (nentries < IBNAL_CQ_ENTRIES()) {
-                        CERROR ("CQ only has %d entries, need %d\n", 
+                        CERROR ("CQ only has %d entries, need %d\n",
                                 nentries, IBNAL_CQ_ENTRIES());
                         goto failed;
                 }
 
-                vvrc = vv_request_completion_notification(kibnal_data.kib_hca, 
-                                                          kibnal_data.kib_cq, 
+                vvrc = vv_request_completion_notification(kibnal_data.kib_hca,
+                                                          kibnal_data.kib_cq,
                                                           vv_next_solicit_unsolicit_event);
                 if (vvrc != 0) {
                         CERROR ("Failed to re-arm completion queue: %d\n", rc);
@@ -1987,7 +1987,7 @@ kibnal_startup (lnet_ni_t *ni)
 
  failed:
         CDEBUG(D_NET, "kibnal_startup failed\n");
-        kibnal_shutdown (ni);    
+        kibnal_shutdown (ni);
         return (-ENETDOWN);
 }
 
@@ -2005,9 +2005,9 @@ kibnal_module_init (void)
 
         vibnal_assert_wire_constants();
 
-        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) 
+        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
                   <= cm_REQ_priv_data_len);
-        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) 
+        CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
                   <= cm_REP_priv_data_len);
         CLASSERT (sizeof(kib_msg_t) <= IBNAL_MSG_SIZE);
 #if !IBNAL_USE_FMR
index 8f016bc..0528b0e 100644 (file)
@@ -177,7 +177,7 @@ kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit)
         LASSERT (conn->ibc_state >= IBNAL_CONN_INIT);
         LASSERT (rx->rx_nob >= 0);              /* not posted */
 
-        CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n", 
+        CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n",
                rx->rx_wrq.scatgat_list->length,
                rx->rx_wrq.scatgat_list->l_key,
                KIBNAL_SG2ADDR(rx->rx_wrq.scatgat_list->v_address));
@@ -211,10 +211,10 @@ kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit)
 
         spin_unlock(&conn->ibc_lock);
 
-        CERROR ("post rx -> %s failed %d\n", 
+        CERROR ("post rx -> %s failed %d\n",
                 libcfs_nid2str(conn->ibc_peer->ibp_nid), vvrc);
         rc = -EIO;
-        kibnal_close_conn(rx->rx_conn, rc);
+        kibnal_close_conn(conn, rc);
         /* No more posts for this rx; so lose its ref */
         kibnal_conn_decref(conn);
         return rc;
@@ -1756,7 +1756,7 @@ kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
         case IBNAL_MSG_PUT_REQ:
                 if (mlen == 0) {
                         lnet_finalize(ni, lntmsg, 0);
-                        kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0,
+                        kibnal_send_completion(conn, IBNAL_MSG_PUT_NAK, 0,
                                                rxmsg->ibm_u.putreq.ibprm_cookie);
                         break;
                 }
@@ -1786,7 +1786,7 @@ kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
                                libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
                         kibnal_tx_done(tx);
                         /* tell peer it's over */
-                        kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc,
+                        kibnal_send_completion(conn, IBNAL_MSG_PUT_NAK, rc,
                                                rxmsg->ibm_u.putreq.ibprm_cookie);
                         break;
                 }
@@ -1818,8 +1818,7 @@ kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
                         kibnal_reply(ni, rx, lntmsg);
                 } else {
                         /* GET didn't match anything */
-                        kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE,
-                                               -ENODATA,
+                        kibnal_send_completion(conn, IBNAL_MSG_GET_DONE, -ENODATA,
                                                rxmsg->ibm_u.get.ibgm_cookie);
                 }
                 break;
@@ -2494,7 +2493,7 @@ kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq)
                         write_unlock_irqrestore(g_lock, flags);
 
                         CWARN("Conn race %s\n",
-                              libcfs_nid2str(peer2->ibp_nid));
+                              libcfs_nid2str(rxmsg.ibm_srcnid));
 
                         kibnal_peer_decref(peer);
                         reason = IBNAL_REJECT_CONN_RACE;
@@ -2632,6 +2631,7 @@ kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq)
         if (conn != NULL) {
                 LASSERT (rc != 0);
                 kibnal_connreq_done(conn, 0, rc);
+                kibnal_conn_decref(conn);
         } else {
                 cm_destroy_cep(cep);
         }
@@ -3072,7 +3072,7 @@ kibnal_arp_done (kib_conn_t *conn)
                                        path->pkey, &cv->cv_pkey_index);
                 if (vvrc != vv_return_ok) {
                         CWARN("pkey2pkey_index failed for %s @ %u.%u.%u.%u: %d\n",
-                              libcfs_nid2str(peer->ibp_nid), 
+                              libcfs_nid2str(peer->ibp_nid),
                               HIPQUAD(peer->ibp_ip), vvrc);
                         goto failed;
                 }
@@ -3102,7 +3102,7 @@ kibnal_arp_done (kib_conn_t *conn)
                                          &path->slid);
                 if (vvrc != vv_return_ok) {
                         CWARN("port_num2base_lid failed for %s @ %u.%u.%u.%u: %d\n",
-                              libcfs_nid2str(peer->ibp_ip), 
+                              libcfs_nid2str(peer->ibp_ip),
                               HIPQUAD(peer->ibp_ip), vvrc);
                         goto failed;
                 }
index 2903ba5..bb2e9f6 100644 (file)
@@ -336,7 +336,7 @@ static cfs_sysctl_table_t kibnal_ctl_table[] = {
                 .procname = "concurrent_sends",
                 .data     = &concurrent_sends,
                 .maxlen   = sizeof(int),
-                .mode     = 0644,
+                .mode     = 0444,
                 .proc_handler = &proc_dointvec
         },
 #if IBNAL_USE_FMR
index 6adaa83..1f61363 100644 (file)
@@ -438,6 +438,7 @@ void libcfs_debug_dumplog_internal(void *arg)
                          cfs_time_current_sec(), (long)arg);
                 printk(KERN_ALERT "LustreError: dumping log to %s\n",
                        debug_file_name);
+
                 tracefile_dump_all_pages(debug_file_name);
                 libcfs_run_debug_log_upcall(debug_file_name);
         }
index 838e814..8918fea 100644 (file)
@@ -80,7 +80,7 @@ lnet_get_networks(void)
                                    "'ip2nets' but not both at once\n");
                 return NULL;
         }
-        
+
         if (*ip2nets != 0) {
                 rc = lnet_parse_ip2nets(&nets, ip2nets);
                 return (rc == 0) ? nets : NULL;
@@ -107,7 +107,7 @@ lnet_get_portals_compatibility(void)
         if (!strcmp(portals_compatibility, "strong")) {
                 return 2;
                 LCONSOLE_WARN("Starting in strong portals-compatible mode\n");
-        } 
+        }
 
         LCONSOLE_ERROR_MSG(0x102, "portals_compatibility=\"%s\" not supported\n",
                            portals_compatibility);
@@ -134,7 +134,7 @@ char *
 lnet_get_routes(void)
 {
         char *str = getenv("LNET_ROUTES");
-        
+
         return (str == NULL) ? "" : str;
 }
 
@@ -175,21 +175,21 @@ lnet_get_networks (void)
         str = default_networks;
         *str = 0;
         sep = "";
-                
+
         list_for_each (tmp, &the_lnet.ln_lnds) {
-                        lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
-                        
-                        nob = snprintf(str, len, "%s%s", sep,
-                                       libcfs_lnd2str(lnd->lnd_type));
-                        len -= nob;
-                        if (len < 0) {
-                                /* overflowed the string; leave it where it was */
-                                *str = 0;
-                                break;
-                        }
-                        
-                        str += nob;
-                        sep = ",";
+                lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
+
+                nob = snprintf(str, len, "%s%s", sep,
+                               libcfs_lnd2str(lnd->lnd_type));
+                len -= nob;
+                if (len < 0) {
+                        /* overflowed the string; leave it where it was */
+                        *str = 0;
+                        break;
+                }
+
+                str += nob;
+                sep = ",";
         }
 
         return default_networks;
@@ -332,7 +332,7 @@ void lnet_assert_wire_constants (void)
 }
 
 lnd_t *
-lnet_find_lnd_by_type (int type) 
+lnet_find_lnd_by_type (int type)
 {
         lnd_t              *lnd;
         struct list_head   *tmp;
@@ -344,7 +344,7 @@ lnet_find_lnd_by_type (int type)
                 if (lnd->lnd_type == type)
                         return lnd;
         }
-        
+
         return NULL;
 }
 
@@ -356,7 +356,7 @@ lnet_register_lnd (lnd_t *lnd)
         LASSERT (the_lnet.ln_init);
         LASSERT (libcfs_isknown_lnd(lnd->lnd_type));
         LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
-        
+
         list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds);
         lnd->lnd_refcount = 0;
 
@@ -373,7 +373,7 @@ lnet_unregister_lnd (lnd_t *lnd)
         LASSERT (the_lnet.ln_init);
         LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
         LASSERT (lnd->lnd_refcount == 0);
-        
+
         list_del (&lnd->lnd_list);
         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
 
@@ -439,7 +439,7 @@ lnet_freelist_fini (lnet_freelist_t *fl)
         LASSERT (count == fl->fl_nobjs);
 
         LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
-        memset (fl, 0, sizeof (fl));
+        memset (fl, 0, sizeof (*fl));
 }
 
 int
@@ -507,10 +507,10 @@ lnet_create_interface_cookie (void)
 }
 
 int
-lnet_setup_handle_hash (void) 
+lnet_setup_handle_hash (void)
 {
         int       i;
-        
+
         /* Arbitrary choice of hash table size */
 #ifdef __KERNEL__
         the_lnet.ln_lh_hash_size = CFS_PAGE_SIZE / sizeof (struct list_head);
@@ -521,12 +521,12 @@ lnet_setup_handle_hash (void)
                      the_lnet.ln_lh_hash_size * sizeof (struct list_head));
         if (the_lnet.ln_lh_hash_table == NULL)
                 return (-ENOMEM);
-        
+
         for (i = 0; i < the_lnet.ln_lh_hash_size; i++)
                 CFS_INIT_LIST_HEAD (&the_lnet.ln_lh_hash_table[i]);
 
         the_lnet.ln_next_object_cookie = LNET_COOKIE_TYPES;
-        
+
         return (0);
 }
 
@@ -535,13 +535,13 @@ lnet_cleanup_handle_hash (void)
 {
         if (the_lnet.ln_lh_hash_table == NULL)
                 return;
-        
+
         LIBCFS_FREE(the_lnet.ln_lh_hash_table,
                     the_lnet.ln_lh_hash_size * sizeof (struct list_head));
 }
 
 lnet_libhandle_t *
-lnet_lookup_cookie (__u64 cookie, int type) 
+lnet_lookup_cookie (__u64 cookie, int type)
 {
         /* ALWAYS called with LNET_LOCK held */
         struct list_head    *list;
@@ -550,23 +550,23 @@ lnet_lookup_cookie (__u64 cookie, int type)
 
         if ((cookie & (LNET_COOKIE_TYPES - 1)) != type)
                 return (NULL);
-        
+
         hash = ((unsigned int)cookie) % the_lnet.ln_lh_hash_size;
         list = &the_lnet.ln_lh_hash_table[hash];
-        
+
         list_for_each (el, list) {
                 lnet_libhandle_t *lh = list_entry (el, lnet_libhandle_t,
                                                   lh_hash_chain);
-                
+
                 if (lh->lh_cookie == cookie)
                         return (lh);
         }
-        
+
         return (NULL);
 }
 
 void
-lnet_initialise_handle (lnet_libhandle_t *lh, int type) 
+lnet_initialise_handle (lnet_libhandle_t *lh, int type)
 {
         /* ALWAYS called with LNET_LOCK held */
         unsigned int    hash;
@@ -574,7 +574,7 @@ lnet_initialise_handle (lnet_libhandle_t *lh, int type)
         LASSERT (type >= 0 && type < LNET_COOKIE_TYPES);
         lh->lh_cookie = the_lnet.ln_next_object_cookie | type;
         the_lnet.ln_next_object_cookie += LNET_COOKIE_TYPES;
-        
+
         hash = ((unsigned int)lh->lh_cookie) % the_lnet.ln_lh_hash_size;
         list_add (&lh->lh_hash_chain, &the_lnet.ln_lh_hash_table[hash]);
 }
@@ -595,7 +595,7 @@ lnet_init_finalizers(void)
         the_lnet.ln_nfinalizers = num_online_cpus();
 
         LIBCFS_ALLOC(the_lnet.ln_finalizers,
-                     the_lnet.ln_nfinalizers * 
+                     the_lnet.ln_nfinalizers *
                      sizeof(*the_lnet.ln_finalizers));
         if (the_lnet.ln_finalizers == NULL) {
                 CERROR("Can't allocate ln_finalizers\n");
@@ -617,7 +617,7 @@ lnet_fini_finalizers(void)
 {
 #ifdef __KERNEL__
         int    i;
-        
+
         for (i = 0; i < the_lnet.ln_nfinalizers; i++)
                 LASSERT (the_lnet.ln_finalizers[i] == NULL);
 
@@ -639,7 +639,7 @@ void
 lnet_server_mode() {
         the_lnet.ln_server_mode_flag = 1;
 }
-#endif        
+#endif
 
 int
 lnet_prepare(lnet_pid_t requested_pid)
@@ -658,7 +658,7 @@ lnet_prepare(lnet_pid_t requested_pid)
 #else
         if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
                 LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
-                
+
                 if (cfs_curproc_uid())/* Only root can run user-space server */
                         return -EPERM;
                 the_lnet.ln_pid = requested_pid;
@@ -667,14 +667,14 @@ lnet_prepare(lnet_pid_t requested_pid)
 
                 /* My PID must be unique on this node and flag I'm userspace */
                 the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
-        }        
+        }
 #endif
 
         rc = lnet_descriptor_setup();
         if (rc != 0)
                 goto failed0;
 
-        memset(&the_lnet.ln_counters, 0, 
+        memset(&the_lnet.ln_counters, 0,
                sizeof(the_lnet.ln_counters));
 
         CFS_INIT_LIST_HEAD (&the_lnet.ln_active_msgs);
@@ -703,8 +703,8 @@ lnet_prepare(lnet_pid_t requested_pid)
                 goto failed2;
 
         the_lnet.ln_nportals = MAX_PORTALS;
-        LIBCFS_ALLOC(the_lnet.ln_portals, 
-                     the_lnet.ln_nportals * 
+        LIBCFS_ALLOC(the_lnet.ln_portals,
+                     the_lnet.ln_nportals *
                      sizeof(*the_lnet.ln_portals));
         if (the_lnet.ln_portals == NULL) {
                 rc = -ENOMEM;
@@ -718,7 +718,7 @@ lnet_prepare(lnet_pid_t requested_pid)
         }
 
         return 0;
-        
+
  failed3:
         lnet_fini_finalizers();
  failed2:
@@ -734,7 +734,7 @@ int
 lnet_unprepare (void)
 {
         int       idx;
-        
+
         /* NB no LNET_LOCK since this is the last reference.  All LND instances
          * have shut down already, so it is safe to unlink and free all
          * descriptors, even those that appear committed to a network op (eg MD
@@ -747,7 +747,7 @@ lnet_unprepare (void)
         LASSERT (list_empty(&the_lnet.ln_nis));
         LASSERT (list_empty(&the_lnet.ln_zombie_nis));
         LASSERT (the_lnet.ln_nzombie_nis == 0);
-               
+
         for (idx = 0; idx < the_lnet.ln_nportals; idx++) {
                 LASSERT (list_empty(&the_lnet.ln_portals[idx].ptl_msgq));
 
@@ -816,7 +816,7 @@ lnet_net2ni_locked (__u32 net)
                         return ni;
                 }
         }
-        
+
         return NULL;
 }
 
@@ -824,7 +824,7 @@ int
 lnet_islocalnet (__u32 net)
 {
         lnet_ni_t        *ni;
-        
+
         LNET_LOCK();
         ni = lnet_net2ni_locked(net);
         if (ni != NULL)
@@ -848,7 +848,7 @@ lnet_nid2ni_locked (lnet_nid_t nid)
                         return ni;
                 }
         }
-        
+
         return NULL;
 }
 
@@ -856,7 +856,7 @@ int
 lnet_islocalnid (lnet_nid_t nid)
 {
         lnet_ni_t     *ni;
-        
+
         LNET_LOCK();
         ni = lnet_nid2ni_locked(nid);
         if (ni != NULL)
@@ -890,7 +890,7 @@ lnet_count_acceptor_nis (lnet_ni_t **first_ni)
                         count++;
                 }
         }
-        
+
         LNET_UNLOCK();
 
 #endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
@@ -1133,7 +1133,7 @@ lnet_startup_lndnis (void)
                         }
                         libcfs_setnet0alias(lnd->lnd_type);
                 }
-                
+
                 nicount++;
         }
 
@@ -1337,21 +1337,21 @@ LNetCtl(unsigned int cmd, void *arg)
 
         case IOC_LIBCFS_FAIL_NID:
                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
-                
+
         case IOC_LIBCFS_ADD_ROUTE:
-                rc = lnet_add_route(data->ioc_net, data->ioc_count, 
+                rc = lnet_add_route(data->ioc_net, data->ioc_count,
                                     data->ioc_nid);
                 return (rc != 0) ? rc : lnet_check_routes();
-                
+
         case IOC_LIBCFS_DEL_ROUTE:
                 return lnet_del_route(data->ioc_net, data->ioc_nid);
 
         case IOC_LIBCFS_GET_ROUTE:
-                return lnet_get_route(data->ioc_count, 
-                                      &data->ioc_net, &data->ioc_count, 
+                return lnet_get_route(data->ioc_count,
+                                      &data->ioc_net, &data->ioc_count,
                                       &data->ioc_nid, &data->ioc_flags);
         case IOC_LIBCFS_NOTIFY_ROUTER:
-                return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, 
+                return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
                                    (time_t)data->ioc_u64[0]);
 
         case IOC_LIBCFS_PORTALS_COMPATIBILITY:
@@ -1361,7 +1361,7 @@ LNetCtl(unsigned int cmd, void *arg)
                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
                 if (rc < 0 && rc != -EHOSTUNREACH)
                         return rc;
-                
+
                 data->ioc_u32[0] = rc;
                 return 0;
 
@@ -1399,12 +1399,12 @@ LNetCtl(unsigned int cmd, void *arg)
                         } else {
                                 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
                         }
-                        
+
                         lnet_ni_decref(ni);
                 }
                 return 0;
         }
-                
+
         default:
                 ni = lnet_net2ni(data->ioc_net);
                 if (ni == NULL)
@@ -1436,7 +1436,7 @@ LNetGetId(unsigned int index, lnet_process_id_t *id)
         list_for_each(tmp, &the_lnet.ln_nis) {
                 if (index-- != 0)
                         continue;
-                
+
                 ni = list_entry(tmp, lnet_ni_t, ni_list);
 
                 id->nid = ni->ni_nid;
@@ -1467,7 +1467,7 @@ lnet_ping_target_init(void)
         int               n;
         int               infosz;
         int               i;
-        
+
         for (n = 0; ; n++) {
                 rc = LNetGetId(n, &id);
                 if (rc == -ENOENT)
@@ -1493,7 +1493,7 @@ lnet_ping_target_init(void)
                 LASSERT (rc == 0);
                 the_lnet.ln_ping_info->pi_nid[i] = id.nid;
         }
-        
+
         /* We can have a tiny EQ since we only need to see the unlink event on
          * teardown, which by definition is the last one! */
         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq);
@@ -1734,7 +1734,7 @@ lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_i
         }
 
         if (nob < offsetof(lnet_ping_info_t, pi_nid[0])) {
-                CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), 
+                CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
                        nob, (int)offsetof(lnet_ping_info_t, pi_nid[0]));
                 goto out_1;
         }
@@ -1743,7 +1743,7 @@ lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_i
                 n_ids = info->pi_nnids;
 
         if (nob < offsetof(lnet_ping_info_t, pi_nid[n_ids])) {
-                CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), 
+                CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
                        nob, (int)offsetof(lnet_ping_info_t, pi_nid[n_ids]));
                 goto out_1;
         }
index a1d2de6..4ca84f3 100644 (file)
@@ -118,6 +118,8 @@ LNetEQFree(lnet_handle_eq_t eqh)
         }
 
         if (eq->eq_refcount != 0) {
+                CDEBUG(D_NET, "Event queue (%d) busy on destroy.\n",
+                       eq->eq_refcount);
                 LNET_UNLOCK();
                 return (-EBUSY);
         }
@@ -311,7 +313,7 @@ LNetEQPoll (lnet_handle_eq_t *eventqs, int neq, int timeout_ms,
                         gettimeofday(&then, NULL);
 
                         ts.tv_sec = then.tv_sec + timeout_ms/1000;
-                        ts.tv_nsec = then.tv_usec * 1000 + 
+                        ts.tv_nsec = then.tv_usec * 1000 +
                                      (timeout_ms%1000) * 1000000;
                         if (ts.tv_nsec >= 1000000000) {
                                 ts.tv_sec++;
index a7e14ff..39fa978 100644 (file)
@@ -222,11 +222,14 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
 
         LASSERT (the_lnet.ln_init);
         LASSERT (the_lnet.ln_refcount > 0);
-        
+
         if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
             umd.length > LNET_MAX_IOV) /* too many fragments */
                 return -EINVAL;
 
+        if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0)
+                return -EINVAL;
+
         md = lnet_md_alloc(&umd);
         if (md == NULL)
                 return -ENOMEM;
@@ -268,11 +271,14 @@ LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle)
 
         LASSERT (the_lnet.ln_init);
         LASSERT (the_lnet.ln_refcount > 0);
-        
+
         if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
             umd.length > LNET_MAX_IOV) /* too many fragments */
                 return -EINVAL;
 
+        if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) != 0)
+                return -EINVAL;
+
         md = lnet_md_alloc(&umd);
         if (md == NULL)
                 return -ENOMEM;
index 66b3d84..90131a1 100644 (file)
 
 int
 LNetMEAttach(unsigned int portal,
-             lnet_process_id_t match_id, 
+             lnet_process_id_t match_id,
              __u64 match_bits, __u64 ignore_bits,
-             lnet_unlink_t unlink, lnet_ins_pos_t pos, 
+             lnet_unlink_t unlink, lnet_ins_pos_t pos,
              lnet_handle_me_t *handle)
 {
         lnet_me_t     *me;
 
         LASSERT (the_lnet.ln_init);
         LASSERT (the_lnet.ln_refcount > 0);
-        
+
         if (portal >= the_lnet.ln_nportals)
                 return -EINVAL;
 
@@ -84,9 +84,9 @@ LNetMEAttach(unsigned int portal,
         return 0;
 }
 
-int 
-LNetMEInsert(lnet_handle_me_t current_meh, 
-             lnet_process_id_t match_id, 
+int
+LNetMEInsert(lnet_handle_me_t current_meh,
+             lnet_process_id_t match_id,
              __u64 match_bits, __u64 ignore_bits,
              lnet_unlink_t unlink, lnet_ins_pos_t pos,
              lnet_handle_me_t *handle)
@@ -94,9 +94,9 @@ LNetMEInsert(lnet_handle_me_t current_meh,
         lnet_me_t     *current_me;
         lnet_me_t     *new_me;
 
-        LASSERT (the_lnet.ln_init);        
+        LASSERT (the_lnet.ln_init);
         LASSERT (the_lnet.ln_refcount > 0);
-        
+
         new_me = lnet_me_alloc();
         if (new_me == NULL)
                 return -ENOMEM;
@@ -121,9 +121,9 @@ LNetMEInsert(lnet_handle_me_t current_meh,
         lnet_initialise_handle (&new_me->me_lh, LNET_COOKIE_TYPE_ME);
 
         if (pos == LNET_INS_AFTER)
-                list_add_tail(&new_me->me_list, &current_me->me_list);
-        else
                 list_add(&new_me->me_list, &current_me->me_list);
+        else
+                list_add_tail(&new_me->me_list, &current_me->me_list);
 
         lnet_me2handle(handle, new_me);
 
index ccad196..26fccb4 100644 (file)
@@ -48,7 +48,6 @@ CFS_MODULE_PARM(local_nid_dist_zero, "i", int, 0444,
 
 /* forward ref */
 static void lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg);
-static void lnet_drop_delayed_put(lnet_msg_t *msg, char *reason);
 
 #define LNET_MATCHMD_NONE     0   /* Didn't match */
 #define LNET_MATCHMD_OK       1   /* Matched OK */
@@ -1711,17 +1710,17 @@ lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get)
 
         LNET_UNLOCK();
 
+        msg->msg_ev.type = LNET_EVENT_GET;
+        msg->msg_ev.target.pid = hdr->dest_pid;
+        msg->msg_ev.target.nid = hdr->dest_nid;
+        msg->msg_ev.hdr_data = 0;
+
         reply_wmd = hdr->msg.get.return_wmd;
 
         lnet_prep_send(msg, LNET_MSG_REPLY, src, offset, mlength);
 
         msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
 
-        msg->msg_ev.type = LNET_EVENT_GET;
-        msg->msg_ev.target.pid = hdr->dest_pid;
-        msg->msg_ev.target.nid = hdr->dest_nid;
-        msg->msg_ev.hdr_data = 0;
-
         if (rdma_get) {
                 /* The LND completes the REPLY from her recv procedure */
                 lnet_ni_recv(ni, msg->msg_private, msg, 0,
@@ -1759,13 +1758,16 @@ lnet_parse_reply(lnet_ni_t *ni, lnet_msg_t *msg)
 
         /* NB handles only looked up by creator (no flips) */
         md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
-        if (md == NULL || md->md_threshold == 0) {
+        if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
                 CDEBUG(D_NETERROR, "%s: Dropping REPLY from %s for %s "
                        "MD "LPX64"."LPX64"\n", 
                        libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
                        (md == NULL) ? "invalid" : "inactive",
                        hdr->msg.reply.dst_wmd.wh_interface_cookie,
                        hdr->msg.reply.dst_wmd.wh_object_cookie);
+                if (md != NULL && md->md_me != NULL)
+                        CERROR("REPLY MD also attached to portal %d\n",
+                               md->md_me->me_portal);
 
                 LNET_UNLOCK();
                 return ENOENT;                  /* +ve: OK but no match */
@@ -1832,7 +1834,7 @@ lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg)
 
         /* NB handles only looked up by creator (no flips) */
         md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
-        if (md == NULL || md->md_threshold == 0) {
+        if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
                 /* Don't moan; this is expected */
                 CDEBUG(D_NET,
                        "%s: Dropping ACK from %s to %s MD "LPX64"."LPX64"\n",
@@ -1840,6 +1842,10 @@ lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg)
                        (md == NULL) ? "invalid" : "inactive",
                        hdr->msg.ack.dst_wmd.wh_interface_cookie,
                        hdr->msg.ack.dst_wmd.wh_object_cookie);
+                if (md != NULL && md->md_me != NULL)
+                        CERROR("Source MD also attached to portal %d\n",
+                               md->md_me->me_portal);
+
                 LNET_UNLOCK();
                 return ENOENT;                  /* +ve! */
         }
@@ -2206,12 +2212,17 @@ LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack,
         LNET_LOCK();
 
         md = lnet_handle2md(&mdh);
-        if (md == NULL || md->md_threshold == 0) {
+        if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
                 lnet_msg_free(msg);
-                LNET_UNLOCK();
 
-                CERROR("Dropping PUT to %s: MD invalid\n", 
-                       libcfs_id2str(target));
+                CERROR("Dropping PUT ("LPU64":%d:%s): MD (%d) invalid\n",
+                       match_bits, portal, libcfs_id2str(target),
+                       md == NULL ? -1 : md->md_threshold);
+                if (md != NULL && md->md_me != NULL)
+                        CERROR("Source MD also attached to portal %d\n",
+                               md->md_me->me_portal);
+
+                LNET_UNLOCK();
                 return -ENOENT;
         }
 
@@ -2383,12 +2394,17 @@ LNetGet(lnet_nid_t self, lnet_handle_md_t mdh,
         LNET_LOCK();
 
         md = lnet_handle2md(&mdh);
-        if (md == NULL || md->md_threshold == 0) {
+        if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
                 lnet_msg_free(msg);
-                LNET_UNLOCK();
 
-                CERROR("Dropping GET to %s: MD invalid\n",
-                       libcfs_id2str(target));
+                CERROR("Dropping GET ("LPU64":%d:%s): MD (%d) invalid\n",
+                       match_bits, portal, libcfs_id2str(target),
+                       md == NULL ? -1 : md->md_threshold);
+                if (md != NULL && md->md_me != NULL)
+                        CERROR("REPLY MD also attached to portal %d\n",
+                               md->md_me->me_portal);
+
+                LNET_UNLOCK();
                 return -ENOENT;
         }
 
index c02fc2f..58733f9 100644 (file)
@@ -28,7 +28,6 @@
 #if defined(__KERNEL__) && defined(LNET_ROUTER)
 
 #include <linux/seq_file.h>
-#include <linux/lustre_compat25.h>
 
 /* this is really lnet_proc.c */
 
index dbf828a..8066bde 100644 (file)
@@ -233,8 +233,8 @@ srpc_find_peer_locked (lnet_nid_t nid)
 static srpc_peer_t *
 srpc_nid2peer (lnet_nid_t nid)
 {
-       srpc_peer_t *peer;
-       srpc_peer_t *new_peer;
+        srpc_peer_t *peer;
+        srpc_peer_t *new_peer;
 
         spin_lock(&srpc_data.rpc_glock);
         peer = srpc_find_peer_locked(nid);
@@ -242,7 +242,7 @@ srpc_nid2peer (lnet_nid_t nid)
 
         if (peer != NULL)
                 return peer;
-        
+
         new_peer = srpc_create_peer(nid);
 
         spin_lock(&srpc_data.rpc_glock);
@@ -260,7 +260,7 @@ srpc_nid2peer (lnet_nid_t nid)
                 spin_unlock(&srpc_data.rpc_glock);
                 return NULL;
         }
-                
+
         list_add_tail(&new_peer->stp_list, srpc_nid2peerlist(nid));
         spin_unlock(&srpc_data.rpc_glock);
         return new_peer;
@@ -410,7 +410,7 @@ srpc_post_passive_rdma(int portal, __u64 matchbits, void *buf,
 }
 
 int
-srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len, 
+srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
                       int options, lnet_process_id_t peer, lnet_nid_t self,
                       lnet_handle_md_t *mdh, srpc_event_t *ev)
 {
@@ -473,7 +473,7 @@ srpc_post_active_rqtbuf(lnet_process_id_t peer, int service, void *buf,
         else
                 portal = SRPC_FRAMEWORK_REQUEST_PORTAL;
 
-        rc = srpc_post_active_rdma(portal, service, buf, len, 
+        rc = srpc_post_active_rdma(portal, service, buf, len,
                                    LNET_MD_OP_PUT, peer,
                                    LNET_NID_ANY, mdh, ev);
         return rc;
@@ -541,7 +541,7 @@ srpc_service_post_buffer (srpc_service_t *sv, srpc_buffer_t *buf)
         spin_unlock(&sv->sv_lock);
         LIBCFS_FREE(buf, sizeof(*buf));
         spin_lock(&sv->sv_lock);
-        return rc; 
+        return rc;
 }
 
 int
@@ -924,8 +924,11 @@ srpc_handle_rpc (swi_workitem_t *wi)
                 msg = &rpc->srpc_reqstbuf->buf_msg;
                 reply = &rpc->srpc_replymsg.msg_body.reply;
 
-                if (msg->msg_version != SRPC_MSG_VERSION &&
-                    msg->msg_version != __swab32(SRPC_MSG_VERSION)) {
+                if (msg->msg_magic == 0) {
+                        /* moaned already in srpc_lnet_ev_handler */
+                        rc = EBADMSG;
+                } else if (msg->msg_version != SRPC_MSG_VERSION &&
+                           msg->msg_version != __swab32(SRPC_MSG_VERSION)) {
                         CWARN ("Version mismatch: %u, %u expected, from %s\n",
                                msg->msg_version, SRPC_MSG_VERSION,
                                libcfs_id2str(rpc->srpc_peer));
@@ -953,7 +956,8 @@ srpc_handle_rpc (swi_workitem_t *wi)
                 }
         }
         case SWI_STATE_BULK_STARTED:
-                LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired);
+                /* we cannot LASSERT ev_fired right here because it
+                 * may be set only upon an event with unlinked==1 */
 
                 if (rpc->srpc_bulk != NULL) {
                         rc = ev->ev_status;
@@ -962,11 +966,20 @@ srpc_handle_rpc (swi_workitem_t *wi)
                                 rc = (*sv->sv_bulk_ready) (rpc, rc);
 
                         if (rc != 0) {
-                                srpc_server_rpc_done(rpc, rc);
-                                return 1;
+                                if (ev->ev_fired) {
+                                        srpc_server_rpc_done(rpc, rc);
+                                        return 1;
+                                }
+
+                                rpc->srpc_status = rc;
+                                wi->wi_state     = SWI_STATE_BULK_ERRORED;
+                                LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
+                                return 0; /* wait for UNLINK event  */
                         }
                 }
 
+                LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired);
+
                 wi->wi_state = SWI_STATE_REPLY_SUBMITTED;
                 rc = srpc_send_reply(rpc);
                 if (rc == 0)
@@ -980,6 +993,13 @@ srpc_handle_rpc (swi_workitem_t *wi)
                 wi->wi_state = SWI_STATE_DONE;
                 srpc_server_rpc_done(rpc, ev->ev_status);
                 return 1;
+
+        case SWI_STATE_BULK_ERRORED:
+                LASSERT (rpc->srpc_bulk != NULL && ev->ev_fired);
+                LASSERT (rpc->srpc_status != 0);
+
+                srpc_server_rpc_done(rpc, rpc->srpc_status);
+                return 1;
         }
 
         return 0;
@@ -1017,20 +1037,20 @@ srpc_add_client_rpc_timer (srpc_client_rpc_t *rpc)
         CFS_INIT_LIST_HEAD(&timer->stt_list);
         timer->stt_data    = rpc;
         timer->stt_func    = srpc_client_rpc_expired;
-        timer->stt_expires = cfs_time_add(rpc->crpc_timeout, 
+        timer->stt_expires = cfs_time_add(rpc->crpc_timeout,
                                           cfs_time_current_sec());
         stt_add_timer(timer);
         return;
 }
 
-/* 
+/*
  * Called with rpc->crpc_lock held.
  *
  * Upon exit the RPC expiry timer is not queued and the handler is not
  * running on any CPU. */
 void
 srpc_del_client_rpc_timer (srpc_client_rpc_t *rpc)
-{     
+{
         /* timer not planted or already exploded */
         if (rpc->crpc_timeout == 0) return;
 
@@ -1042,7 +1062,7 @@ srpc_del_client_rpc_timer (srpc_client_rpc_t *rpc)
         while (rpc->crpc_timeout != 0) {
                 spin_unlock(&rpc->crpc_lock);
 
-                cfs_schedule(); 
+                cfs_schedule();
 
                 spin_lock(&rpc->crpc_lock);
         }
@@ -1110,7 +1130,7 @@ srpc_client_rpc_done (srpc_client_rpc_t *rpc, int status)
          * No one can schedule me now since:
          * - RPC timer has been defused.
          * - all LNet events have been fired.
-         * - crpc_closed has been set, preventing srpc_abort_rpc from 
+         * - crpc_closed has been set, preventing srpc_abort_rpc from
          *   scheduling me.
          * Cancel pending schedules and prevent future schedule attempts:
          */
@@ -1168,7 +1188,7 @@ srpc_send_rpc (swi_workitem_t *wi)
 
         case SWI_STATE_REQUEST_SUBMITTED:
                 /* CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
-                 * order; however, they're processed in a strict order: 
+                 * order; however, they're processed in a strict order:
                  * rqt, rpy, and bulk. */
                 if (!rpc->crpc_reqstev.ev_fired) break;
 
@@ -1185,7 +1205,7 @@ srpc_send_rpc (swi_workitem_t *wi)
                 rc = rpc->crpc_replyev.ev_status;
                 if (rc != 0) break;
 
-                if ((reply->msg_type != type && 
+                if ((reply->msg_type != type &&
                      reply->msg_type != __swab32(type)) ||
                     (reply->msg_magic != SRPC_MSG_MAGIC &&
                      reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
@@ -1254,7 +1274,7 @@ srpc_create_client_rpc (lnet_process_id_t peer, int service,
 {
         srpc_client_rpc_t *rpc;
 
-       LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t,
+        LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t,
                                    crpc_bulk.bk_iovs[nbulkiov]));
         if (rpc == NULL)
                 return NULL;
@@ -1403,7 +1423,7 @@ srpc_send_reply (srpc_server_rpc_t *rpc)
 }
 
 /* when in kernel always called with LNET_LOCK() held, and in thread context */
-void 
+void
 srpc_lnet_ev_handler (lnet_event_t *ev)
 {
         srpc_event_t      *rpcev = ev->md.user_ptr;
@@ -1413,6 +1433,7 @@ srpc_lnet_ev_handler (lnet_event_t *ev)
         srpc_service_t    *sv;
         srpc_msg_t        *msg;
         srpc_msg_type_t    type;
+        int                fired_flag = 1;
 
         LASSERT (!in_interrupt());
 
@@ -1445,7 +1466,7 @@ srpc_lnet_ev_handler (lnet_event_t *ev)
 
                 LASSERT (rpcev->ev_fired == 0);
                 rpcev->ev_fired  = 1;
-                rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? 
+                rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
                                                 -EINTR : ev->status;
                 swi_schedule_workitem(&crpc->crpc_wi);
 
@@ -1473,7 +1494,7 @@ srpc_lnet_ev_handler (lnet_event_t *ev)
                 LASSERT (sv->sv_nposted_msg >= 0);
 
                 if (sv->sv_shuttingdown) {
-                        /* Leave buffer on sv->sv_posted_msgq since 
+                        /* Leave buffer on sv->sv_posted_msgq since
                          * srpc_finish_service needs to traverse it. */
                         spin_unlock(&sv->sv_lock);
                         break;
@@ -1484,7 +1505,7 @@ srpc_lnet_ev_handler (lnet_event_t *ev)
                 type = srpc_service2request(sv->sv_id);
 
                 if (ev->status != 0 || ev->mlength != sizeof(*msg) ||
-                    (msg->msg_type != type && 
+                    (msg->msg_type != type &&
                      msg->msg_type != __swab32(type)) ||
                     (msg->msg_magic != SRPC_MSG_MAGIC &&
                      msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
@@ -1494,20 +1515,10 @@ srpc_lnet_ev_handler (lnet_event_t *ev)
                                 ev->status, ev->mlength,
                                 msg->msg_type, msg->msg_magic);
 
-                        /* NB might drop sv_lock in srpc_service_recycle_buffer,
-                         * sv_nposted_msg++ as an implicit reference to prevent
-                         * sv from disappearing under me */
-                        sv->sv_nposted_msg++;
-                        srpc_service_recycle_buffer(sv, buffer);
-                        sv->sv_nposted_msg--;
-                        spin_unlock(&sv->sv_lock);
-
-                        if (ev->status == 0) { /* status!=0 counted already */
-                                spin_lock(&srpc_data.rpc_glock);
-                                srpc_data.rpc_counters.errors++;
-                                spin_unlock(&srpc_data.rpc_glock);
-                        }
-                        break;
+                        /* NB can't call srpc_service_recycle_buffer here since
+                         * it may call LNetM[DE]Attach. The invalid magic tells
+                         * srpc_handle_rpc to drop this RPC */
+                        msg->msg_magic = 0;
                 }
 
                 if (!list_empty(&sv->sv_free_rpcq)) {
@@ -1534,10 +1545,13 @@ srpc_lnet_ev_handler (lnet_event_t *ev)
                          ev->type == LNET_EVENT_REPLY ||
                          ev->type == LNET_EVENT_UNLINK);
 
-                if (ev->type == LNET_EVENT_SEND && 
-                    ev->status == 0 && !ev->unlinked)
-                        break; /* wait for the final LNET_EVENT_REPLY */
-
+                if (ev->type == LNET_EVENT_SEND && !ev->unlinked) {
+                        if (ev->status == 0)
+                                break; /* wait for the final LNET_EVENT_REPLY */
+                        else
+                                fired_flag = 0; /* LNET_EVENT_REPLY may arrive
+                                                   (optimized GET case) */
+                }
         case SRPC_BULK_PUT_SENT:
                 if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
                         spin_lock(&srpc_data.rpc_glock);
@@ -1556,9 +1570,12 @@ srpc_lnet_ev_handler (lnet_event_t *ev)
                 LASSERT (rpcev == &srpc->srpc_ev);
 
                 spin_lock(&sv->sv_lock);
-                rpcev->ev_fired  = 1;
-                rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? 
+                if (fired_flag)
+                        rpcev->ev_fired  = 1;
+
+                rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
                                                 -EINTR : ev->status;
+
                 srpc_schedule_server_rpc(srpc);
                 spin_unlock(&sv->sv_lock);
                 break;
index 8dff8d8..cdfa933 100644 (file)
@@ -90,6 +90,7 @@ typedef struct { volatile int counter; } atomic_t;
 #define SWI_STATE_REQUEST_SENT             4
 #define SWI_STATE_REPLY_RECEIVED           5
 #define SWI_STATE_BULK_STARTED             6
+#define SWI_STATE_BULK_ERRORED             7
 #define SWI_STATE_DONE                     10
 
 /* forward refs */
@@ -105,11 +106,11 @@ struct sfw_test_instance;
  *   serialized with respect to itself.
  * - no CPU affinity, a workitem does not necessarily run on the same CPU
  *   that schedules it. However, this might change in the future.
- * - if a workitem is scheduled again before it has a chance to run, it 
+ * - if a workitem is scheduled again before it has a chance to run, it
  *   runs only once.
- * - if a workitem is scheduled while it runs, it runs again after it 
- *   completes; this ensures that events occurring while other events are 
- *   being processed receive due attention. This behavior also allows a 
+ * - if a workitem is scheduled while it runs, it runs again after it
+ *   completes; this ensures that events occurring while other events are
+ *   being processed receive due attention. This behavior also allows a
  *   workitem to reschedule itself.
  *
  * Usage notes:
@@ -389,7 +390,7 @@ typedef struct {
 typedef struct {
         int  (*tso_init)(struct sfw_test_instance *tsi); /* intialize test client */
         void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test client */
-        int  (*tso_prep_rpc)(struct sfw_test_unit *tsu,     
+        int  (*tso_prep_rpc)(struct sfw_test_unit *tsu,
                              lnet_process_id_t dest,
                              srpc_client_rpc_t **rpc);   /* prep a tests rpc */
         void (*tso_done_rpc)(struct sfw_test_unit *tsu,
@@ -422,7 +423,7 @@ typedef struct sfw_test_instance {
         } tsi_u;
 } sfw_test_instance_t;
 
-/* XXX: trailing (CFS_PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at 
+/* XXX: trailing (CFS_PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at
  * the end of pages are not used */
 #define SFW_MAX_CONCUR     LST_MAX_CONCUR
 #define SFW_ID_PER_PAGE    (CFS_PAGE_SIZE / sizeof(lnet_process_id_t))
@@ -459,7 +460,7 @@ void sfw_add_bulk_page(srpc_bulk_t *bk, cfs_page_t *pg, int i);
 int sfw_alloc_pages(srpc_server_rpc_t *rpc, int npages, int sink);
 
 srpc_client_rpc_t *
-srpc_create_client_rpc(lnet_process_id_t peer, int service, 
+srpc_create_client_rpc(lnet_process_id_t peer, int service,
                        int nbulkiov, int bulklen,
                        void (*rpc_done)(srpc_client_rpc_t *),
                        void (*rpc_fini)(srpc_client_rpc_t *), void *priv);
@@ -547,12 +548,12 @@ srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer,
         return;
 }
 
-static inline const char * 
+static inline const char *
 swi_state2str (int state)
 {
 #define STATE2STR(x) case x: return #x
         switch(state) {
-                default: 
+                default:
                         LBUG();
                 STATE2STR(SWI_STATE_NEWBORN);
                 STATE2STR(SWI_STATE_REPLY_SUBMITTED);
@@ -561,6 +562,7 @@ swi_state2str (int state)
                 STATE2STR(SWI_STATE_REQUEST_SENT);
                 STATE2STR(SWI_STATE_REPLY_RECEIVED);
                 STATE2STR(SWI_STATE_BULK_STARTED);
+                STATE2STR(SWI_STATE_BULK_ERRORED);
                 STATE2STR(SWI_STATE_DONE);
         }
 #undef STATE2STR
index 7000a5e..3b504e7 100644 (file)
@@ -37,7 +37,6 @@
  *
  * Author: Eric Barton <eeb@bartonsoftware.com>
  */
- */
 
 #include "ptllnd.h"
 
@@ -45,13 +44,13 @@ lnd_t               the_ptllnd = {
         .lnd_type       = PTLLND,
         .lnd_startup    = ptllnd_startup,
         .lnd_shutdown   = ptllnd_shutdown,
-       .lnd_ctl        = ptllnd_ctl,
+        .lnd_ctl        = ptllnd_ctl,
         .lnd_send       = ptllnd_send,
         .lnd_recv       = ptllnd_recv,
         .lnd_eager_recv = ptllnd_eager_recv,
         .lnd_notify     = ptllnd_notify,
         .lnd_wait       = ptllnd_wait,
-       .lnd_setasync   = ptllnd_setasync,
+        .lnd_setasync   = ptllnd_setasync,
 };
 
 static int ptllnd_ni_count = 0;
@@ -62,112 +61,112 @@ static struct list_head ptllnd_history_list;
 void
 ptllnd_history_fini(void)
 {
-       ptllnd_he_t *he;
-
-       while (!list_empty(&ptllnd_idle_history)) {
-               he = list_entry(ptllnd_idle_history.next,
-                               ptllnd_he_t, he_list);
-               
-               list_del(&he->he_list);
-               LIBCFS_FREE(he, sizeof(*he));
-       }
-       
-       while (!list_empty(&ptllnd_history_list)) {
-               he = list_entry(ptllnd_history_list.next,
-                               ptllnd_he_t, he_list);
-               
-               list_del(&he->he_list);
-               LIBCFS_FREE(he, sizeof(*he));
-       }
+        ptllnd_he_t *he;
+
+        while (!list_empty(&ptllnd_idle_history)) {
+                he = list_entry(ptllnd_idle_history.next,
+                                ptllnd_he_t, he_list);
+
+                list_del(&he->he_list);
+                LIBCFS_FREE(he, sizeof(*he));
+        }
+
+        while (!list_empty(&ptllnd_history_list)) {
+                he = list_entry(ptllnd_history_list.next,
+                                ptllnd_he_t, he_list);
+
+                list_del(&he->he_list);
+                LIBCFS_FREE(he, sizeof(*he));
+        }
 }
 
 int
 ptllnd_history_init(void)
 {
-       int          i;
-       ptllnd_he_t *he;
-       int          n;
-       int          rc;
-       
-       CFS_INIT_LIST_HEAD(&ptllnd_idle_history);
-       CFS_INIT_LIST_HEAD(&ptllnd_history_list);
-       
-       rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0);
-       if (rc != 0)
-               return rc;
-       
-       for (i = 0; i < n; i++) {
-               LIBCFS_ALLOC(he, sizeof(*he));
-               if (he == NULL) {
-                       ptllnd_history_fini();
-                       return -ENOMEM;
-               }
-               
-               list_add(&he->he_list, &ptllnd_idle_history);
-       }
-
-       PTLLND_HISTORY("Init");
-
-       return 0;
+        int          i;
+        ptllnd_he_t *he;
+        int          n;
+        int          rc;
+
+        CFS_INIT_LIST_HEAD(&ptllnd_idle_history);
+        CFS_INIT_LIST_HEAD(&ptllnd_history_list);
+
+        rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0);
+        if (rc != 0)
+                return rc;
+
+        for (i = 0; i < n; i++) {
+                LIBCFS_ALLOC(he, sizeof(*he));
+                if (he == NULL) {
+                        ptllnd_history_fini();
+                        return -ENOMEM;
+                }
+
+                list_add(&he->he_list, &ptllnd_idle_history);
+        }
+
+        PTLLND_HISTORY("Init");
+
+        return 0;
 }
 
 void
 ptllnd_history(const char *fn, const char *file, const int line,
-              const char *fmt, ...)
+               const char *fmt, ...)
 {
-       static int     seq;
-       
+        static int     seq;
+
         va_list        ap;
-       ptllnd_he_t   *he;
-       
-       if (!list_empty(&ptllnd_idle_history)) {
-               he = list_entry(ptllnd_idle_history.next,
-                               ptllnd_he_t, he_list);
-       } else if (!list_empty(&ptllnd_history_list)) {
-               he = list_entry(ptllnd_history_list.next,
-                               ptllnd_he_t, he_list);
-       } else {
-               return;
-       }
-
-       list_del(&he->he_list);
-       list_add_tail(&he->he_list, &ptllnd_history_list);
-
-       he->he_seq = seq++;
-       he->he_fn = fn;
-       he->he_file = file;
-       he->he_line = line;
-       gettimeofday(&he->he_time, NULL);
-       
-       va_start(ap, fmt);
-       vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap);
-       va_end(ap);
+        ptllnd_he_t   *he;
+
+        if (!list_empty(&ptllnd_idle_history)) {
+                he = list_entry(ptllnd_idle_history.next,
+                                ptllnd_he_t, he_list);
+        } else if (!list_empty(&ptllnd_history_list)) {
+                he = list_entry(ptllnd_history_list.next,
+                                ptllnd_he_t, he_list);
+        } else {
+                return;
+        }
+
+        list_del(&he->he_list);
+        list_add_tail(&he->he_list, &ptllnd_history_list);
+
+        he->he_seq = seq++;
+        he->he_fn = fn;
+        he->he_file = file;
+        he->he_line = line;
+        gettimeofday(&he->he_time, NULL);
+
+        va_start(ap, fmt);
+        vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap);
+        va_end(ap);
 }
 
 void
 ptllnd_dump_history(void)
 {
-       ptllnd_he_t    *he;
+        ptllnd_he_t    *he;
+
+        PTLLND_HISTORY("dumping...");
 
-       PTLLND_HISTORY("dumping...");
-       
-       while (!list_empty(&ptllnd_history_list)) {
-               he = list_entry(ptllnd_history_list.next,
-                               ptllnd_he_t, he_list);
+        while (!list_empty(&ptllnd_history_list)) {
+                he = list_entry(ptllnd_history_list.next,
+                                ptllnd_he_t, he_list);
 
-               list_del(&he->he_list);
-               
-               CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq,
-                      (int)he->he_time.tv_sec, (int)he->he_time.tv_usec,
-                      he->he_file, he->he_line, he->he_fn, he->he_msg);
+                list_del(&he->he_list);
 
-               list_add_tail(&he->he_list, &ptllnd_idle_history);
-       }
+                CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq,
+                       (int)he->he_time.tv_sec, (int)he->he_time.tv_usec,
+                       he->he_file, he->he_line, he->he_fn, he->he_msg);
 
-       PTLLND_HISTORY("complete");
+                list_add_tail(&he->he_list, &ptllnd_idle_history);
+        }
+
+        PTLLND_HISTORY("complete");
 }
 
-void 
+void
 ptllnd_assert_wire_constants (void)
 {
         /* Wire protocol assertions generated by 'wirecheck'
@@ -273,10 +272,10 @@ ptllnd_get_tunables(lnet_ni_t *ni)
         int          rc;
         int          temp;
 
-       /*  Other tunable defaults depend on this */
-       rc = ptllnd_parse_int_tunable(&plni->plni_debug, "PTLLND_DEBUG", 0);
-       if (rc != 0)
-               return rc;
+        /*  Other tunable defaults depend on this */
+        rc = ptllnd_parse_int_tunable(&plni->plni_debug, "PTLLND_DEBUG", 0);
+        if (rc != 0)
+                return rc;
 
         rc = ptllnd_parse_int_tunable(&plni->plni_portal,
                                       "PTLLND_PORTAL", PTLLND_PORTAL);
@@ -293,6 +292,11 @@ ptllnd_get_tunables(lnet_ni_t *ni)
                                       "PTLLND_PEERCREDITS", PTLLND_PEERCREDITS);
         if (rc != 0)
                 return rc;
+        /* kptl_msg_t::ptlm_credits is only a __u8 */
+        if (plni->plni_peer_credits > 255) {
+                CERROR("PTLLND_PEERCREDITS must be <= 255\n");
+                return -EINVAL;
+        }
 
         rc = ptllnd_parse_int_tunable(&max_msg_size,
                                       "PTLLND_MAX_MSG_SIZE",
@@ -321,56 +325,56 @@ ptllnd_get_tunables(lnet_ni_t *ni)
         if (rc != 0)
                 return rc;
 
-       rc = ptllnd_parse_int_tunable(&plni->plni_checksum,
-                                     "PTLLND_CHECKSUM", 0);
-       if (rc != 0)
-               return rc;
-
-       rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history,
-                                     "PTLLND_TX_HISTORY",
-                                     plni->plni_debug ? 1024 : 0);
-       if (rc != 0)
-               return rc;
-
-       rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_protocol_mismatch,
-                                     "PTLLND_ABORT_ON_PROTOCOL_MISMATCH", 1);
-       if (rc != 0)
-               return rc;
-
-       rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak,
-                                     "PTLLND_ABORT_ON_NAK", 0);
-       if (rc != 0)
-               return rc;
-
-       rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak,
-                                     "PTLLND_DUMP_ON_NAK", plni->plni_debug);
-       if (rc != 0)
-               return rc;
-
-       rc = ptllnd_parse_int_tunable(&plni->plni_watchdog_interval,
-                                     "PTLLND_WATCHDOG_INTERVAL", 1);
-       if (rc != 0)
-               return rc;
-       if (plni->plni_watchdog_interval <= 0)
-               plni->plni_watchdog_interval = 1;
-
-       rc = ptllnd_parse_int_tunable(&plni->plni_timeout,
-                                     "PTLLND_TIMEOUT", 50);
-       if (rc != 0)
-               return rc;
-
-       rc = ptllnd_parse_int_tunable(&plni->plni_long_wait,
-                                     "PTLLND_LONG_WAIT",
-                                     plni->plni_debug ? 5 : plni->plni_timeout);
-       if (rc != 0)
-               return rc;
-       plni->plni_long_wait *= 1000;           /* convert to mS */
+        rc = ptllnd_parse_int_tunable(&plni->plni_checksum,
+                                      "PTLLND_CHECKSUM", 0);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history,
+                                      "PTLLND_TX_HISTORY",
+                                      plni->plni_debug ? 1024 : 0);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_protocol_mismatch,
+                                      "PTLLND_ABORT_ON_PROTOCOL_MISMATCH", 1);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak,
+                                      "PTLLND_ABORT_ON_NAK", 0);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak,
+                                      "PTLLND_DUMP_ON_NAK", plni->plni_debug);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_watchdog_interval,
+                                      "PTLLND_WATCHDOG_INTERVAL", 1);
+        if (rc != 0)
+                return rc;
+        if (plni->plni_watchdog_interval <= 0)
+                plni->plni_watchdog_interval = 1;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_timeout,
+                                      "PTLLND_TIMEOUT", 50);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_long_wait,
+                                      "PTLLND_LONG_WAIT",
+                                      plni->plni_debug ? 5 : plni->plni_timeout);
+        if (rc != 0)
+                return rc;
+        plni->plni_long_wait *= 1000;           /* convert to mS */
 
         plni->plni_max_msg_size = max_msg_size & ~7;
         if (plni->plni_max_msg_size < PTLLND_MIN_BUFFER_SIZE)
                 plni->plni_max_msg_size = PTLLND_MIN_BUFFER_SIZE;
-       CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
-       CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
+        CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
+        CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
 
         plni->plni_buffer_size = plni->plni_max_msg_size * msgs_per_buffer;
 
@@ -442,9 +446,9 @@ ptllnd_size_buffers (lnet_ni_t *ni, int delta)
         CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
         CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers);
 
-       plni->plni_nmsgs += delta;
-       LASSERT(plni->plni_nmsgs >= 0);
-       
+        plni->plni_nmsgs += delta;
+        LASSERT(plni->plni_nmsgs >= 0);
+
         nmsgs = plni->plni_nmsgs + plni->plni_msgs_spare;
 
         nbufs = (nmsgs * plni->plni_max_msg_size + plni->plni_buffer_size - 1) /
@@ -491,22 +495,22 @@ ptllnd_destroy_buffers (lnet_ni_t *ni)
 
                 LASSERT (plni->plni_nbuffers > 0);
                 if (buf->plb_posted) {
-                       time_t   start = cfs_time_current_sec();
-                       int      w = plni->plni_long_wait;
+                        time_t   start = cfs_time_current_sec();
+                        int      w = plni->plni_long_wait;
 
                         LASSERT (plni->plni_nposted_buffers > 0);
 
 #ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
                         (void) PtlMDUnlink(buf->plb_md);
 
-                       while (buf->plb_posted) {
-                               if (w > 0 && cfs_time_current_sec() > start + w/1000) {
-                                       CWARN("Waited %ds to unlink buffer\n",
-                                             (int)(cfs_time_current_sec() - start));
-                                       w *= 2;
-                               }
-                               ptllnd_wait(ni, w);
-                       }
+                        while (buf->plb_posted) {
+                                if (w > 0 && cfs_time_current_sec() > start + w/1000) {
+                                        CWARN("Waited %ds to unlink buffer\n",
+                                              (int)(cfs_time_current_sec() - start));
+                                        w *= 2;
+                                }
+                                ptllnd_wait(ni, w);
+                        }
 #else
                         while (buf->plb_posted) {
                                 rc = PtlMDUnlink(buf->plb_md);
@@ -516,12 +520,12 @@ ptllnd_destroy_buffers (lnet_ni_t *ni)
                                         break;
                                 }
                                 LASSERT (rc == PTL_MD_IN_USE);
-                               if (w > 0 && cfs_time_current_sec() > start + w/1000) {
-                                       CWARN("Waited %ds to unlink buffer\n",
-                                             cfs_time_current_sec() - start);
-                                       w *= 2;
-                               }
-                               ptllnd_wait(ni, w);
+                                if (w > 0 && cfs_time_current_sec() > start + w/1000) {
+                                        CWARN("Waited %ds to unlink buffer\n",
+                                              cfs_time_current_sec() - start);
+                                        w *= 2;
+                                }
+                                ptllnd_wait(ni, w);
                         }
 #endif
                 }
@@ -591,14 +595,14 @@ ptllnd_close_peers (lnet_ni_t *ni)
 int
 ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
 {
-       switch (cmd) {
-       case IOC_LIBCFS_DEBUG_PEER:
-               ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg));
-               return 0;
-               
-       default:
-               return -EINVAL;
-       }
+        switch (cmd) {
+        case IOC_LIBCFS_DEBUG_PEER:
+                ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg));
+                return 0;
+
+        default:
+                return -EINVAL;
+        }
 }
 
 __u64
@@ -616,25 +620,25 @@ ptllnd_shutdown (lnet_ni_t *ni)
 {
         ptllnd_ni_t *plni = ni->ni_data;
         int          rc;
-       time_t       start = cfs_time_current_sec();
-       int          w = plni->plni_long_wait;
+        time_t       start = cfs_time_current_sec();
+        int          w = plni->plni_long_wait;
 
         LASSERT (ptllnd_ni_count == 1);
-       plni->plni_max_tx_history = 0;
+        plni->plni_max_tx_history = 0;
 
-       ptllnd_cull_tx_history(plni);
+        ptllnd_cull_tx_history(plni);
 
         ptllnd_close_peers(ni);
         ptllnd_destroy_buffers(ni);
 
         while (plni->plni_npeers > 0) {
-               if (w > 0 && cfs_time_current_sec() > start + w/1000) {
-                       CWARN("Waited %ds for peers to shutdown\n",
-                             (int)(cfs_time_current_sec() - start));
-                       w *= 2;
-               }
+                if (w > 0 && cfs_time_current_sec() > start + w/1000) {
+                        CWARN("Waited %ds for peers to shutdown\n",
+                              (int)(cfs_time_current_sec() - start));
+                        w *= 2;
+                }
                 ptllnd_wait(ni, w);
-       }
+        }
 
         LASSERT (plni->plni_ntxs == 0);
         LASSERT (plni->plni_nrxs == 0);
@@ -656,9 +660,9 @@ ptllnd_startup (lnet_ni_t *ni)
         ptllnd_ni_t *plni;
         int          rc;
 
-       /* could get limits from portals I guess... */
-       ni->ni_maxtxcredits =
-       ni->ni_peertxcredits = 1000;
+        /* could get limits from portals I guess... */
+        ni->ni_maxtxcredits =
+        ni->ni_peertxcredits = 1000;
 
         if (ptllnd_ni_count != 0) {
                 CERROR("Can't have > 1 instance of ptllnd\n");
@@ -667,12 +671,12 @@ ptllnd_startup (lnet_ni_t *ni)
 
         ptllnd_ni_count++;
 
-       rc = ptllnd_history_init();
-       if (rc != 0) {
-               CERROR("Can't init history\n");
-               goto failed0;
-       }
-       
+        rc = ptllnd_history_init();
+        if (rc != 0) {
+                CERROR("Can't init history\n");
+                goto failed0;
+        }
+
         LIBCFS_ALLOC(plni, sizeof(*plni));
         if (plni == NULL) {
                 CERROR("Can't allocate ptllnd state\n");
@@ -685,9 +689,9 @@ ptllnd_startup (lnet_ni_t *ni)
         plni->plni_stamp = ptllnd_get_timestamp();
         plni->plni_nrxs = 0;
         plni->plni_ntxs = 0;
-       plni->plni_ntx_history = 0;
-       plni->plni_watchdog_peeridx = 0;
-       plni->plni_watchdog_nextt = cfs_time_current_sec();
+        plni->plni_ntx_history = 0;
+        plni->plni_watchdog_peeridx = 0;
+        plni->plni_watchdog_nextt = cfs_time_current_sec();
         CFS_INIT_LIST_HEAD(&plni->plni_zombie_txs);
         CFS_INIT_LIST_HEAD(&plni->plni_tx_history);
 
@@ -714,7 +718,7 @@ ptllnd_startup (lnet_ni_t *ni)
                        NULL, NULL, &plni->plni_nih);
         if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
                 CERROR("PtlNIInit failed: %s(%d)\n",
-                      ptllnd_errtype2str(rc), rc);
+                       ptllnd_errtype2str(rc), rc);
                 rc = -ENODEV;
                 goto failed2;
         }
@@ -723,7 +727,7 @@ ptllnd_startup (lnet_ni_t *ni)
                         PTL_EQ_HANDLER_NONE, &plni->plni_eqh);
         if (rc != PTL_OK) {
                 CERROR("PtlEQAlloc failed: %s(%d)\n",
-                      ptllnd_errtype2str(rc), rc);
+                       ptllnd_errtype2str(rc), rc);
                 rc = -ENODEV;
                 goto failed3;
         }
@@ -731,10 +735,10 @@ ptllnd_startup (lnet_ni_t *ni)
         /*
          * Fetch the Portals NID
          */
-       rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id);
+        rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id);
         if (rc != PTL_OK) {
                 CERROR ("PtlGetID failed : %s(%d)\n",
-                       ptllnd_errtype2str(rc), rc);
+                        ptllnd_errtype2str(rc), rc);
                 rc = -EINVAL;
                 goto failed4;
         }
@@ -754,7 +758,7 @@ ptllnd_startup (lnet_ni_t *ni)
         if (rc != 0)
                 goto failed4;
 
-       return 0;
+        return 0;
 
  failed4:
         ptllnd_destroy_buffers(ni);
@@ -766,7 +770,7 @@ ptllnd_startup (lnet_ni_t *ni)
  failed1:
         LIBCFS_FREE(plni, sizeof(*plni));
  failed0:
-       ptllnd_history_fini();
+        ptllnd_history_fini();
         ptllnd_ni_count--;
         CDEBUG(D_NET, "<<< rc=%d\n",rc);
         return rc;
index 87697e7..b8198b2 100644 (file)
@@ -140,11 +140,13 @@ typedef struct
 
         int                        plp_max_msg_size;
         int                        plp_refcount;
+        int                        plp_sent_hello:1;
         int                        plp_recvd_hello:1;
         int                        plp_closing:1;
         __u64                      plp_match;
         __u64                      plp_stamp;
         struct list_head           plp_txq;
+        struct list_head           plp_noopq;
         struct list_head           plp_activeq;
 } ptllnd_peer_t;
 
@@ -271,13 +273,13 @@ ptllnd_peer_decref (ptllnd_peer_t *peer)
 static inline lnet_nid_t
 ptllnd_ptl2lnetnid(lnet_ni_t *ni, ptl_nid_t portals_nid)
 {
-       return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid);
+        return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid);
 }
 
 static inline ptl_nid_t
 ptllnd_lnet2ptlnid(lnet_nid_t lnet_nid)
 {
-       return LNET_NIDADDR(lnet_nid);
+        return LNET_NIDADDR(lnet_nid);
 }
 
 /*
index da6d277..c8431df 100644 (file)
@@ -55,6 +55,8 @@ ptllnd_post_tx(ptllnd_tx_t *tx)
 {
         ptllnd_peer_t  *peer = tx->tx_peer;
 
+        LASSERT (tx->tx_type != PTLLND_MSG_TYPE_NOOP);
+
         ptllnd_set_tx_deadline(tx);
         list_add_tail(&tx->tx_list, &peer->plp_txq);
         ptllnd_check_sends(peer);
@@ -67,7 +69,7 @@ ptllnd_ptlid2str(ptl_process_id_t id)
         static int  idx = 0;
 
         char   *str = strs[idx++];
-        
+
         if (idx >= sizeof(strs)/sizeof(strs[0]))
                 idx = 0;
 
@@ -88,6 +90,7 @@ ptllnd_destroy_peer(ptllnd_peer_t *peer)
         LASSERT (peer->plp_closing);
         LASSERT (plni->plni_npeers > 0);
         LASSERT (list_empty(&peer->plp_txq));
+        LASSERT (list_empty(&peer->plp_noopq));
         LASSERT (list_empty(&peer->plp_activeq));
         plni->plni_npeers--;
         LIBCFS_FREE(peer, sizeof(*peer));
@@ -117,14 +120,16 @@ ptllnd_close_peer(ptllnd_peer_t *peer, int error)
         peer->plp_closing = 1;
 
         if (!list_empty(&peer->plp_txq) ||
+            !list_empty(&peer->plp_noopq) ||
             !list_empty(&peer->plp_activeq) ||
             error != 0) {
                 CWARN("Closing %s\n", libcfs_id2str(peer->plp_id));
                 if (plni->plni_debug)
                         ptllnd_dump_debug(ni, peer->plp_id);
         }
-        
+
         ptllnd_abort_txs(plni, &peer->plp_txq);
+        ptllnd_abort_txs(plni, &peer->plp_noopq);
         ptllnd_abort_txs(plni, &peer->plp_activeq);
 
         list_del(&peer->plp_list);
@@ -136,16 +141,13 @@ ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create)
 {
         ptllnd_ni_t       *plni = ni->ni_data;
         unsigned int       hash = LNET_NIDADDR(id.nid) % plni->plni_peer_hash_size;
-        struct list_head  *tmp;
         ptllnd_peer_t     *plp;
         ptllnd_tx_t       *tx;
         int                rc;
 
         LASSERT (LNET_NIDNET(id.nid) == LNET_NIDNET(ni->ni_nid));
 
-        list_for_each(tmp, &plni->plni_peer_hash[hash]) {
-                plp = list_entry(tmp, ptllnd_peer_t, plp_list);
-
+        list_for_each_entry (plp, &plni->plni_peer_hash[hash], plp_list) {
                 if (plp->plp_id.nid == id.nid &&
                     plp->plp_id.pid == id.pid) {
                         ptllnd_peer_addref(plp);
@@ -184,11 +186,13 @@ ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create)
         plp->plp_extra_lazy_credits = 0;
         plp->plp_match = 0;
         plp->plp_stamp = 0;
+        plp->plp_sent_hello = 0;
         plp->plp_recvd_hello = 0;
         plp->plp_closing = 0;
         plp->plp_refcount = 1;
         CFS_INIT_LIST_HEAD(&plp->plp_list);
         CFS_INIT_LIST_HEAD(&plp->plp_txq);
+        CFS_INIT_LIST_HEAD(&plp->plp_noopq);
         CFS_INIT_LIST_HEAD(&plp->plp_activeq);
 
         ptllnd_peer_addref(plp);
@@ -221,27 +225,27 @@ ptllnd_count_q(struct list_head *q)
 {
         struct list_head *e;
         int               n = 0;
-        
+
         list_for_each(e, q) {
                 n++;
         }
-        
+
         return n;
 }
 
 const char *
-ptllnd_tx_typestr(int type) 
+ptllnd_tx_typestr(int type)
 {
         switch (type) {
         case PTLLND_RDMA_WRITE:
                 return "rdma_write";
-                
+
         case PTLLND_RDMA_READ:
                 return "rdma_read";
 
         case PTLLND_MSG_TYPE_PUT:
                 return "put_req";
-                
+
         case PTLLND_MSG_TYPE_GET:
                 return "get_req";
 
@@ -260,13 +264,13 @@ ptllnd_tx_typestr(int type)
 }
 
 void
-ptllnd_debug_tx(ptllnd_tx_t *tx) 
+ptllnd_debug_tx(ptllnd_tx_t *tx)
 {
         CDEBUG(D_WARNING, "%s %s b %ld.%06ld/%ld.%06ld"
                " r %ld.%06ld/%ld.%06ld status %d\n",
                ptllnd_tx_typestr(tx->tx_type),
                libcfs_id2str(tx->tx_peer->plp_id),
-               tx->tx_bulk_posted.tv_sec, tx->tx_bulk_posted.tv_usec, 
+               tx->tx_bulk_posted.tv_sec, tx->tx_bulk_posted.tv_usec,
                tx->tx_bulk_done.tv_sec, tx->tx_bulk_done.tv_usec,
                tx->tx_req_posted.tv_sec, tx->tx_req_posted.tv_usec,
                tx->tx_req_done.tv_sec, tx->tx_req_done.tv_usec,
@@ -277,59 +281,56 @@ void
 ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id)
 {
         ptllnd_peer_t    *plp = ptllnd_find_peer(ni, id, 0);
-        struct list_head *tmp;
         ptllnd_ni_t      *plni = ni->ni_data;
         ptllnd_tx_t      *tx;
-        
+
         if (plp == NULL) {
                 CDEBUG(D_WARNING, "No peer %s\n", libcfs_id2str(id));
                 return;
         }
-        
-        CDEBUG(D_WARNING, "%s %s%s [%d] "LPU64".%06d m "LPU64" q %d/%d c %d/%d+%d(%d)\n",
-               libcfs_id2str(id), 
-               plp->plp_recvd_hello ? "H" : "_",
-               plp->plp_closing     ? "C" : "_",
-               plp->plp_refcount,
-               plp->plp_stamp / 1000000, (int)(plp->plp_stamp % 1000000),
-               plp->plp_match,
-               ptllnd_count_q(&plp->plp_txq),
-               ptllnd_count_q(&plp->plp_activeq),
-               plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits,
-               plni->plni_peer_credits + plp->plp_lazy_credits);
+
+        CWARN("%s %s%s [%d] "LPU64".%06d m "LPU64" q %d/%d/%d c %d/%d+%d(%d)\n",
+              libcfs_id2str(id),
+              plp->plp_recvd_hello ? "H" : "_",
+              plp->plp_closing     ? "C" : "_",
+              plp->plp_refcount,
+              plp->plp_stamp / 1000000, (int)(plp->plp_stamp % 1000000),
+              plp->plp_match,
+              ptllnd_count_q(&plp->plp_txq),
+              ptllnd_count_q(&plp->plp_noopq),
+              ptllnd_count_q(&plp->plp_activeq),
+              plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits,
+              plni->plni_peer_credits + plp->plp_lazy_credits);
 
         CDEBUG(D_WARNING, "txq:\n");
-        list_for_each (tmp, &plp->plp_txq) {
-                tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-                
+        list_for_each_entry (tx, &plp->plp_txq, tx_list) {
+                ptllnd_debug_tx(tx);
+        }
+
+        CDEBUG(D_WARNING, "noopq:\n");
+        list_for_each_entry (tx, &plp->plp_noopq, tx_list) {
                 ptllnd_debug_tx(tx);
         }
 
         CDEBUG(D_WARNING, "activeq:\n");
-        list_for_each (tmp, &plp->plp_activeq) {
-                tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-                
+        list_for_each_entry (tx, &plp->plp_activeq, tx_list) {
                 ptllnd_debug_tx(tx);
         }
 
         CDEBUG(D_WARNING, "zombies:\n");
-        list_for_each (tmp, &plni->plni_zombie_txs) {
-                tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-                
+        list_for_each_entry (tx, &plni->plni_zombie_txs, tx_list) {
                 if (tx->tx_peer->plp_id.nid == id.nid &&
                     tx->tx_peer->plp_id.pid == id.pid)
                         ptllnd_debug_tx(tx);
         }
-        
+
         CDEBUG(D_WARNING, "history:\n");
-        list_for_each (tmp, &plni->plni_tx_history) {
-                tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-                
+        list_for_each_entry (tx, &plni->plni_tx_history, tx_list) {
                 if (tx->tx_peer->plp_id.nid == id.nid &&
                     tx->tx_peer->plp_id.pid == id.pid)
                         ptllnd_debug_tx(tx);
         }
-        
+
         ptllnd_peer_decref(plp);
 }
 
@@ -354,7 +355,7 @@ ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive)
 
         id.nid = nid;
         id.pid = LUSTRE_SRV_LNET_PID;
-        
+
         peer = ptllnd_find_peer(ni, id, 1);
         if (peer == NULL)
                 return;
@@ -367,10 +368,10 @@ ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive)
                               libcfs_id2str(id));
                         w *= 2;
                 }
-                
+
                 ptllnd_wait(ni, w);
         }
-        
+
         ptllnd_peer_decref(peer);
 }
 
@@ -379,7 +380,7 @@ ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int nasync)
 {
         ptllnd_peer_t *peer = ptllnd_find_peer(ni, id, nasync > 0);
         int            rc;
-        
+
         if (peer == NULL)
                 return -ENOMEM;
 
@@ -404,7 +405,7 @@ ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int nasync)
 
         nasync -= peer->plp_extra_lazy_credits;
         peer->plp_extra_lazy_credits = 0;
-        
+
         rc = ptllnd_size_buffers(ni, nasync);
         if (rc == 0) {
                 peer->plp_lazy_credits += nasync;
@@ -597,7 +598,7 @@ ptllnd_tx_done(ptllnd_tx_t *tx)
                 }
                 ptllnd_close_peer(peer, tx->tx_status);
         }
-        
+
         ptllnd_abort_tx(tx, &tx->tx_reqmdh);
         ptllnd_abort_tx(tx, &tx->tx_bulkmdh);
 
@@ -619,7 +620,7 @@ ptllnd_tx_done(ptllnd_tx_t *tx)
 
         plni->plni_ntx_history++;
         list_add_tail(&tx->tx_list, &plni->plni_tx_history);
-        
+
         ptllnd_cull_tx_history(plni);
 }
 
@@ -663,7 +664,7 @@ ptllnd_set_txiov(ptllnd_tx_t *tx,
 
                         piov[npiov].iov_base = iov[npiov].iov_base + temp_offset;
                         piov[npiov].iov_len = iov[npiov].iov_len - temp_offset;
-                        
+
                         if (piov[npiov].iov_len >= resid) {
                                 piov[npiov].iov_len = resid;
                                 npiov++;
@@ -759,11 +760,25 @@ ptllnd_post_buffer(ptllnd_buffer_t *buf)
         return -ENOMEM;
 }
 
+static inline int
+ptllnd_peer_send_noop (ptllnd_peer_t *peer)
+{
+        ptllnd_ni_t *plni = peer->plp_ni->ni_data;
+
+        if (!peer->plp_sent_hello ||
+            peer->plp_credits == 0 ||
+            !list_empty(&peer->plp_noopq) ||
+            peer->plp_outstanding_credits < PTLLND_CREDIT_HIGHWATER(plni))
+                return 0;
+
+        /* No tx to piggyback NOOP onto or no credit to send a tx */
+        return (list_empty(&peer->plp_txq) || peer->plp_credits == 1);
+}
+
 void
 ptllnd_check_sends(ptllnd_peer_t *peer)
 {
-        lnet_ni_t      *ni = peer->plp_ni;
-        ptllnd_ni_t    *plni = ni->ni_data;
+        ptllnd_ni_t    *plni = peer->plp_ni->ni_data;
         ptllnd_tx_t    *tx;
         ptl_md_t        md;
         ptl_handle_md_t mdh;
@@ -774,10 +789,7 @@ ptllnd_check_sends(ptllnd_peer_t *peer)
                peer->plp_outstanding_credits, peer->plp_sent_credits,
                plni->plni_peer_credits + peer->plp_lazy_credits);
 
-        if (list_empty(&peer->plp_txq) &&
-            peer->plp_outstanding_credits >= PTLLND_CREDIT_HIGHWATER(plni) &&
-            peer->plp_credits != 0) {
-
+        if (ptllnd_peer_send_noop(peer)) {
                 tx = ptllnd_new_tx(peer, PTLLND_MSG_TYPE_NOOP, 0);
                 CDEBUG(D_NET, "NOOP tx=%p\n",tx);
                 if (tx == NULL) {
@@ -785,12 +797,22 @@ ptllnd_check_sends(ptllnd_peer_t *peer)
                                libcfs_id2str(peer->plp_id));
                 } else {
                         ptllnd_set_tx_deadline(tx);
-                        list_add_tail(&tx->tx_list, &peer->plp_txq);
+                        list_add_tail(&tx->tx_list, &peer->plp_noopq);
                 }
         }
 
-        while (!list_empty(&peer->plp_txq)) {
-                tx = list_entry(peer->plp_txq.next, ptllnd_tx_t, tx_list);
+        for (;;) {
+                if (!list_empty(&peer->plp_noopq)) {
+                        LASSERT (peer->plp_sent_hello);
+                        tx = list_entry(peer->plp_noopq.next,
+                                        ptllnd_tx_t, tx_list);
+                } else if (!list_empty(&peer->plp_txq)) {
+                        tx = list_entry(peer->plp_txq.next,
+                                        ptllnd_tx_t, tx_list);
+                } else {
+                        /* nothing to send right now */
+                        break;
+                }
 
                 LASSERT (tx->tx_msgsize > 0);
 
@@ -800,6 +822,14 @@ ptllnd_check_sends(ptllnd_peer_t *peer)
                          <= plni->plni_peer_credits + peer->plp_lazy_credits);
                 LASSERT (peer->plp_credits >= 0);
 
+                /* say HELLO first */
+                if (!peer->plp_sent_hello) {
+                        LASSERT (list_empty(&peer->plp_noopq));
+                        LASSERT (tx->tx_type == PTLLND_MSG_TYPE_HELLO);
+
+                        peer->plp_sent_hello = 1;
+                }
+
                 if (peer->plp_credits == 0) {   /* no credits */
                         PTLLND_HISTORY("%s[%d/%d+%d(%d)]: no creds for %p",
                                        libcfs_id2str(peer->plp_id),
@@ -810,9 +840,11 @@ ptllnd_check_sends(ptllnd_peer_t *peer)
                                        peer->plp_lazy_credits, tx);
                         break;
                 }
-                
-                if (peer->plp_credits == 1 &&   /* last credit reserved for */
-                    peer->plp_outstanding_credits == 0) { /* returning credits */
+
+                /* Last/Initial credit reserved for NOOP/HELLO */
+                if (peer->plp_credits == 1 &&
+                    tx->tx_type != PTLLND_MSG_TYPE_NOOP &&
+                    tx->tx_type != PTLLND_MSG_TYPE_HELLO) {
                         PTLLND_HISTORY("%s[%d/%d+%d(%d)]: too few creds for %p",
                                        libcfs_id2str(peer->plp_id),
                                        peer->plp_credits,
@@ -822,7 +854,7 @@ ptllnd_check_sends(ptllnd_peer_t *peer)
                                        peer->plp_lazy_credits, tx);
                         break;
                 }
-                
+
                 list_del(&tx->tx_list);
                 list_add_tail(&tx->tx_list, &peer->plp_activeq);
 
@@ -830,9 +862,7 @@ ptllnd_check_sends(ptllnd_peer_t *peer)
                         ptllnd_msgtype2str(tx->tx_type),tx->tx_type);
 
                 if (tx->tx_type == PTLLND_MSG_TYPE_NOOP &&
-                    (!list_empty(&peer->plp_txq) ||
-                     peer->plp_outstanding_credits <
-                     PTLLND_CREDIT_HIGHWATER(plni))) {
+                    !ptllnd_peer_send_noop(peer)) {
                         /* redundant NOOP */
                         ptllnd_tx_done(tx);
                         continue;
@@ -878,7 +908,7 @@ ptllnd_check_sends(ptllnd_peer_t *peer)
 
                 LASSERT (tx->tx_type != PTLLND_RDMA_WRITE &&
                          tx->tx_type != PTLLND_RDMA_READ);
-                
+
                 tx->tx_reqmdh = mdh;
                 gettimeofday(&tx->tx_req_posted, NULL);
 
@@ -1130,7 +1160,7 @@ ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg)
 
         LASSERT (msg->msg_niov <= PTL_MD_MAX_IOV); /* !!! */
 
-        CDEBUG(D_NET, "%s [%d]+%d,%d -> %s%s\n", 
+        CDEBUG(D_NET, "%s [%d]+%d,%d -> %s%s\n",
                lnet_msgtyp2str(msg->msg_type),
                msg->msg_niov, msg->msg_offset, msg->msg_len,
                libcfs_nid2str(msg->msg_target.nid),
@@ -1141,7 +1171,7 @@ ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg)
                        libcfs_id2str(msg->msg_target));
                 return -EHOSTUNREACH;
         }
-        
+
         plp = ptllnd_find_peer(ni, msg->msg_target, 1);
         if (plp == NULL)
                 return -ENOMEM;
@@ -1223,8 +1253,7 @@ void
 ptllnd_rx_done(ptllnd_rx_t *rx)
 {
         ptllnd_peer_t *plp = rx->rx_peer;
-        lnet_ni_t     *ni = plp->plp_ni;
-        ptllnd_ni_t   *plni = ni->ni_data;
+        ptllnd_ni_t   *plni = plp->plp_ni->ni_data;
 
         plp->plp_outstanding_credits++;
 
@@ -1234,7 +1263,7 @@ ptllnd_rx_done(ptllnd_rx_t *rx)
                        plp->plp_sent_credits,
                        plni->plni_peer_credits + plp->plp_lazy_credits, rx);
 
-        ptllnd_check_sends(rx->rx_peer);
+        ptllnd_check_sends(plp);
 
         LASSERT (plni->plni_nrxs > 0);
         plni->plni_nrxs--;
@@ -1337,7 +1366,7 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
         msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version;
 
         if (msg_version != PTLLND_MSG_VERSION) {
-                CERROR("Bad protocol version %04x from %s: %04x expected\n", 
+                CERROR("Bad protocol version %04x from %s: %04x expected\n",
                        (__u32)msg_version, ptllnd_ptlid2str(initiator), PTLLND_MSG_VERSION);
 
                 if (plni->plni_abort_on_protocol_mismatch)
@@ -1366,7 +1395,7 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
 
         msg->ptlm_version = msg_version;
         msg->ptlm_cksum = msg_cksum;
-        
+
         if (flip) {
                 /* NB stamps are opaque cookies */
                 __swab32s(&msg->ptlm_nob);
@@ -1375,7 +1404,7 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
                 __swab32s(&msg->ptlm_srcpid);
                 __swab32s(&msg->ptlm_dstpid);
         }
-        
+
         srcid.nid = msg->ptlm_srcnid;
         srcid.pid = msg->ptlm_srcpid;
 
@@ -1387,19 +1416,19 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
         }
 
         if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) {
-                CERROR("NAK from %s (%s)\n", 
+                CERROR("NAK from %s (%s)\n",
                        libcfs_id2str(srcid),
                        ptllnd_ptlid2str(initiator));
 
                 if (plni->plni_dump_on_nak)
                         ptllnd_dump_debug(ni, srcid);
-                
+
                 if (plni->plni_abort_on_nak)
                         abort();
-                
+
                 return;
         }
-        
+
         if (msg->ptlm_dstnid != ni->ni_nid ||
             msg->ptlm_dstpid != the_lnet.ln_pid) {
                 CERROR("Bad dstid %s (%s expected) from %s\n",
@@ -1459,7 +1488,7 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
                         __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size);
                 }
                 break;
-                
+
         case PTLLND_MSG_TYPE_NOOP:
                 break;
 
@@ -1509,19 +1538,16 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
         if (plp->plp_sent_credits == 0) {
                 CERROR("%s[%d/%d+%d(%d)]: unexpected message\n",
                        libcfs_id2str(plp->plp_id),
-                       plp->plp_credits, plp->plp_outstanding_credits, 
+                       plp->plp_credits, plp->plp_outstanding_credits,
                        plp->plp_sent_credits,
                        plni->plni_peer_credits + plp->plp_lazy_credits);
                 return;
         }
         plp->plp_sent_credits--;
-        
+
         /* No check for credit overflow - the peer may post new buffers after
          * the startup handshake. */
-        if (msg->ptlm_credits > 0) {
-                plp->plp_credits += msg->ptlm_credits;
-                ptllnd_check_sends(plp);
-        }
+        plp->plp_credits += msg->ptlm_credits;
 
         /* All OK so far; assume the message is good... */
 
@@ -1551,6 +1577,9 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
                 break;
         }
 
+        if (msg->ptlm_credits > 0)
+                ptllnd_check_sends(plp);
+
         ptllnd_peer_decref(plp);
 }
 
@@ -1580,7 +1609,7 @@ ptllnd_buf_event (lnet_ni_t *ni, ptl_event_t *event)
                 /* Portals can't force message alignment - someone sending an
                  * odd-length message could misalign subsequent messages */
                 if ((event->mlength & 7) != 0) {
-                        CERROR("Message from %s has odd length %llu: "
+                        CERROR("Message from %s has odd length %u: "
                                "probable version incompatibility\n",
                                ptllnd_ptlid2str(event->initiator),
                                event->mlength);
@@ -1655,7 +1684,7 @@ ptllnd_tx_event (lnet_ni_t *ni, ptl_event_t *event)
         LASSERT (!isreq != !isbulk);            /* always one and only 1 match */
 
         PTLLND_HISTORY("%s[%d/%d+%d(%d)]: TX done %p %s%s",
-                       libcfs_id2str(tx->tx_peer->plp_id), 
+                       libcfs_id2str(tx->tx_peer->plp_id),
                        tx->tx_peer->plp_credits,
                        tx->tx_peer->plp_outstanding_credits,
                        tx->tx_peer->plp_sent_credits,
@@ -1728,18 +1757,19 @@ ptllnd_tx_t *
 ptllnd_find_timed_out_tx(ptllnd_peer_t *peer)
 {
         time_t            now = cfs_time_current_sec();
-        struct list_head *tmp;
+        ptllnd_tx_t *tx;
+
+        list_for_each_entry (tx, &peer->plp_txq, tx_list) {
+                if (tx->tx_deadline < now)
+                        return tx;
+        }
 
-        list_for_each(tmp, &peer->plp_txq) {
-                ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-                
+        list_for_each_entry (tx, &peer->plp_noopq, tx_list) {
                 if (tx->tx_deadline < now)
                         return tx;
         }
-        
-        list_for_each(tmp, &peer->plp_activeq) {
-                ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-                
+
+        list_for_each_entry (tx, &peer->plp_activeq, tx_list) {
                 if (tx->tx_deadline < now)
                         return tx;
         }
@@ -1751,10 +1781,10 @@ void
 ptllnd_check_peer(ptllnd_peer_t *peer)
 {
         ptllnd_tx_t *tx = ptllnd_find_timed_out_tx(peer);
-        
+
         if (tx == NULL)
                 return;
-        
+
         CERROR("%s: timed out\n", libcfs_id2str(peer->plp_id));
         ptllnd_close_peer(peer, -ETIMEDOUT);
 }
@@ -1788,11 +1818,11 @@ ptllnd_watchdog (lnet_ni_t *ni, time_t now)
 
         for (i = 0; i < chunk; i++) {
                 hashlist = &plni->plni_peer_hash[plni->plni_watchdog_peeridx];
-                
+
                 list_for_each_safe(tmp, nxt, hashlist) {
                         ptllnd_check_peer(list_entry(tmp, ptllnd_peer_t, plp_list));
                 }
-                
+
                 plni->plni_watchdog_peeridx = (plni->plni_watchdog_peeridx + 1) %
                                               plni->plni_peer_hash_size;
         }
@@ -1811,7 +1841,7 @@ ptllnd_wait (lnet_ni_t *ni, int milliseconds)
         struct timeval         then;
         struct timeval         now;
         struct timeval         deadline;
-        
+
         ptllnd_ni_t   *plni = ni->ni_data;
         ptllnd_tx_t   *tx;
         ptl_event_t    event;
@@ -1841,7 +1871,7 @@ ptllnd_wait (lnet_ni_t *ni, int milliseconds)
 
         for (;;) {
                 gettimeofday(&then, NULL);
-                
+
                 rc = PtlEQPoll(&plni->plni_eqh, 1, timeout, &event, &which);
 
                 gettimeofday(&now, NULL);
@@ -1862,7 +1892,7 @@ ptllnd_wait (lnet_ni_t *ni, int milliseconds)
                                 ptllnd_watchdog(ni, now.tv_sec);
                                 LASSERT (now.tv_sec < plni->plni_watchdog_nextt);
                         }
-                        
+
                         if (now.tv_sec > deadline.tv_sec || /* timeout expired */
                             (now.tv_sec == deadline.tv_sec &&
                              now.tv_usec >= deadline.tv_usec))
@@ -1878,7 +1908,7 @@ ptllnd_wait (lnet_ni_t *ni, int milliseconds)
 
                         continue;
                 }
-                
+
                 LASSERT (rc == PTL_OK || rc == PTL_EQ_DROPPED);
 
                 if (rc == PTL_EQ_DROPPED)
index 88272b0..6442e09 100644 (file)
@@ -40,7 +40,7 @@
 
 #define __USE_FILE_OFFSET64
 #ifndef _GNU_SOURCE
-#define  _GNU_SOURCE
+#define _GNU_SOURCE
 #endif
 
 #include <stdio.h>
@@ -89,7 +89,7 @@ static const char *libcfs_debug_subsystems[] =
          "pinger", "filter", "", "echo",
          "ldlm", "lov", "lquota", "",
          "", "", "", "lmv",
-         "", "sec", "gss", "", 
+         "", "sec", "gss", "",
          "mgc", "mgs", "fid", "fld", NULL};
 static const char *libcfs_debug_masks[] =
         {"trace", "inode", "super", "ext2",
@@ -280,10 +280,11 @@ static int applymask(char* procpath, int value)
         if (rc != 0) {
                 fprintf(stderr, "Write to %s failed: %s\n",
                         procpath, strerror(errno));
-                return rc;
         }
+
         dbg_close_ctlhandle(fd);
-        return 0;
+
+        return rc;
 }
 
 static void applymask_all(unsigned int subs_mask, unsigned int debug_mask)
@@ -389,7 +390,7 @@ static int add_rec(struct dbg_line *line, struct dbg_line ***linevp, int *lenp,
                 *linevp = linev;
                 *lenp = nlen;
         }
-        linev[used] = line; 
+        linev[used] = line;
         return 1;
 }
 
@@ -456,10 +457,10 @@ static int parse_buffer(FILE *in, FILE *out)
                 line->text = p;
 
                 if (!add_rec(line, &linev, &linev_len, kept)) {
-                        fprintf(stderr, "malloc failed; printing accumulated " 
+                        fprintf(stderr, "malloc failed; printing accumulated "
                                 "records and exiting.\n");
                         break;
-                }        
+                }
                 kept++;
         }
 
@@ -499,7 +500,7 @@ int jt_dbg_debug_kernel(int argc, char **argv)
                 strcpy(filename, argv[1]);
         else
                 sprintf(filename, "/tmp/lustre-log."CFS_TIME_T".%u",
-                       time(NULL),getpid());
+                        time(NULL),getpid());
 
         if (stat(filename, &st) == 0 && S_ISREG(st.st_mode))
                 unlink(filename);
@@ -515,7 +516,7 @@ int jt_dbg_debug_kernel(int argc, char **argv)
         if (rc != 0) {
                 fprintf(stderr, "write(%s) failed: %s\n", filename,
                         strerror(errno));
-                close(fd);
+                dbg_close_ctlhandle(fd);
                 return 1;
         }
         dbg_close_ctlhandle(fd);