Whamcloud - gitweb
b=17167 libcfs: ensure all libcfs exported symbols to have cfs_ prefix
[fs/lustre-release.git] / lnet / ulnds / ptllnd / ptllnd.c
index 92a436f..0e378b5 100644 (file)
@@ -1,20 +1,41 @@
-
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- *   Author: Eric Barton <eeb@bartonsoftware.com>
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
  *
- *   This file is confidential source code owned by Cluster File Systems.
- *   No viewing, modification, compilation, redistribution, or any other
- *   form of use is permitted except through a signed license agreement.
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
  *
- *   If you have not signed such an agreement, then you have no rights to
- *   this file.  Please destroy it immediately and contact CFS.
+ * lnet/ulnds/ptllnd/ptllnd.c
  *
+ * Author: Eric Barton <eeb@bartonsoftware.com>
  */
 
 #include "ptllnd.h"
@@ -23,129 +44,128 @@ lnd_t               the_ptllnd = {
         .lnd_type       = PTLLND,
         .lnd_startup    = ptllnd_startup,
         .lnd_shutdown   = ptllnd_shutdown,
-       .lnd_ctl        = ptllnd_ctl,
+        .lnd_ctl        = ptllnd_ctl,
         .lnd_send       = ptllnd_send,
         .lnd_recv       = ptllnd_recv,
         .lnd_eager_recv = ptllnd_eager_recv,
-        .lnd_notify     = ptllnd_notify,
         .lnd_wait       = ptllnd_wait,
-       .lnd_setasync   = ptllnd_setasync,
+        .lnd_setasync   = ptllnd_setasync,
 };
 
 static int ptllnd_ni_count = 0;
 
-static struct list_head ptllnd_idle_history;
-static struct list_head ptllnd_history_list;
+static cfs_list_t ptllnd_idle_history;
+static cfs_list_t ptllnd_history_list;
 
 void
 ptllnd_history_fini(void)
 {
-       ptllnd_he_t *he;
-
-       while (!list_empty(&ptllnd_idle_history)) {
-               he = list_entry(ptllnd_idle_history.next,
-                               ptllnd_he_t, he_list);
-               
-               list_del(&he->he_list);
-               LIBCFS_FREE(he, sizeof(*he));
-       }
-       
-       while (!list_empty(&ptllnd_history_list)) {
-               he = list_entry(ptllnd_history_list.next,
-                               ptllnd_he_t, he_list);
-               
-               list_del(&he->he_list);
-               LIBCFS_FREE(he, sizeof(*he));
-       }
+        ptllnd_he_t *he;
+
+        while (!cfs_list_empty(&ptllnd_idle_history)) {
+                he = cfs_list_entry(ptllnd_idle_history.next,
+                                    ptllnd_he_t, he_list);
+
+                cfs_list_del(&he->he_list);
+                LIBCFS_FREE(he, sizeof(*he));
+        }
+
+        while (!cfs_list_empty(&ptllnd_history_list)) {
+                he = cfs_list_entry(ptllnd_history_list.next,
+                                    ptllnd_he_t, he_list);
+
+                cfs_list_del(&he->he_list);
+                LIBCFS_FREE(he, sizeof(*he));
+        }
 }
 
 int
 ptllnd_history_init(void)
 {
-       int          i;
-       ptllnd_he_t *he;
-       int          n;
-       int          rc;
-       
-       CFS_INIT_LIST_HEAD(&ptllnd_idle_history);
-       CFS_INIT_LIST_HEAD(&ptllnd_history_list);
-       
-       rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0);
-       if (rc != 0)
-               return rc;
-       
-       for (i = 0; i < n; i++) {
-               LIBCFS_ALLOC(he, sizeof(*he));
-               if (he == NULL) {
-                       ptllnd_history_fini();
-                       return -ENOMEM;
-               }
-               
-               list_add(&he->he_list, &ptllnd_idle_history);
-       }
-
-       PTLLND_HISTORY("Init");
-
-       return 0;
+        int          i;
+        ptllnd_he_t *he;
+        int          n;
+        int          rc;
+
+        CFS_INIT_LIST_HEAD(&ptllnd_idle_history);
+        CFS_INIT_LIST_HEAD(&ptllnd_history_list);
+
+        rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0);
+        if (rc != 0)
+                return rc;
+
+        for (i = 0; i < n; i++) {
+                LIBCFS_ALLOC(he, sizeof(*he));
+                if (he == NULL) {
+                        ptllnd_history_fini();
+                        return -ENOMEM;
+                }
+
+                cfs_list_add(&he->he_list, &ptllnd_idle_history);
+        }
+
+        PTLLND_HISTORY("Init");
+
+        return 0;
 }
 
 void
 ptllnd_history(const char *fn, const char *file, const int line,
-              const char *fmt, ...)
+               const char *fmt, ...)
 {
-       static int     seq;
-       
+        static int     seq;
+
         va_list        ap;
-       ptllnd_he_t   *he;
-       
-       if (!list_empty(&ptllnd_idle_history)) {
-               he = list_entry(ptllnd_idle_history.next,
-                               ptllnd_he_t, he_list);
-       } else if (!list_empty(&ptllnd_history_list)) {
-               he = list_entry(ptllnd_history_list.next,
-                               ptllnd_he_t, he_list);
-       } else {
-               return;
-       }
-
-       list_del(&he->he_list);
-       list_add_tail(&he->he_list, &ptllnd_history_list);
-
-       he->he_seq = seq++;
-       he->he_fn = fn;
-       he->he_file = file;
-       he->he_line = line;
-       gettimeofday(&he->he_time, NULL);
-       
-       va_start(ap, fmt);
-       vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap);
-       va_end(ap);
+        ptllnd_he_t   *he;
+
+        if (!cfs_list_empty(&ptllnd_idle_history)) {
+                he = cfs_list_entry(ptllnd_idle_history.next,
+                                    ptllnd_he_t, he_list);
+        } else if (!cfs_list_empty(&ptllnd_history_list)) {
+                he = cfs_list_entry(ptllnd_history_list.next,
+                                    ptllnd_he_t, he_list);
+        } else {
+                return;
+        }
+
+        cfs_list_del(&he->he_list);
+        cfs_list_add_tail(&he->he_list, &ptllnd_history_list);
+
+        he->he_seq = seq++;
+        he->he_fn = fn;
+        he->he_file = file;
+        he->he_line = line;
+        gettimeofday(&he->he_time, NULL);
+
+        va_start(ap, fmt);
+        vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap);
+        va_end(ap);
 }
 
 void
 ptllnd_dump_history(void)
 {
-       ptllnd_he_t    *he;
+        ptllnd_he_t    *he;
+
+        PTLLND_HISTORY("dumping...");
 
-       PTLLND_HISTORY("dumping...");
-       
-       while (!list_empty(&ptllnd_history_list)) {
-               he = list_entry(ptllnd_history_list.next,
-                               ptllnd_he_t, he_list);
+        while (!cfs_list_empty(&ptllnd_history_list)) {
+                he = cfs_list_entry(ptllnd_history_list.next,
+                                ptllnd_he_t, he_list);
 
-               list_del(&he->he_list);
-               
-               CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq,
-                      (int)he->he_time.tv_sec, (int)he->he_time.tv_usec,
-                      he->he_file, he->he_line, he->he_fn, he->he_msg);
+                cfs_list_del(&he->he_list);
 
-               list_add_tail(&he->he_list, &ptllnd_idle_history);
-       }
+                CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq,
+                       (int)he->he_time.tv_sec, (int)he->he_time.tv_usec,
+                       he->he_file, he->he_line, he->he_fn, he->he_msg);
+
+                cfs_list_add_tail(&he->he_list, &ptllnd_idle_history);
+        }
 
-       PTLLND_HISTORY("complete");
+        PTLLND_HISTORY("complete");
 }
 
-void 
+void
 ptllnd_assert_wire_constants (void)
 {
         /* Wire protocol assertions generated by 'wirecheck'
@@ -251,6 +271,11 @@ ptllnd_get_tunables(lnet_ni_t *ni)
         int          rc;
         int          temp;
 
+        /*  Other tunable defaults depend on this */
+        rc = ptllnd_parse_int_tunable(&plni->plni_debug, "PTLLND_DEBUG", 0);
+        if (rc != 0)
+                return rc;
+
         rc = ptllnd_parse_int_tunable(&plni->plni_portal,
                                       "PTLLND_PORTAL", PTLLND_PORTAL);
         if (rc != 0)
@@ -266,6 +291,10 @@ ptllnd_get_tunables(lnet_ni_t *ni)
                                       "PTLLND_PEERCREDITS", PTLLND_PEERCREDITS);
         if (rc != 0)
                 return rc;
+        if (plni->plni_peer_credits > PTLLND_MSG_MAX_CREDITS) {
+                CERROR("PTLLND_PEERCREDITS must be <= %d\n", PTLLND_MSG_MAX_CREDITS);
+                return -EINVAL;
+        }
 
         rc = ptllnd_parse_int_tunable(&max_msg_size,
                                       "PTLLND_MAX_MSG_SIZE",
@@ -274,56 +303,76 @@ ptllnd_get_tunables(lnet_ni_t *ni)
                 return rc;
 
         rc = ptllnd_parse_int_tunable(&msgs_per_buffer,
-                                      "PTLLND_MSGS_PER_BUFFER",
-                                      PTLLND_MSGS_PER_BUFFER);
+                                      "PTLLND_MSGS_PER_BUFFER", 64);
         if (rc != 0)
                 return rc;
 
         rc = ptllnd_parse_int_tunable(&plni->plni_msgs_spare,
-                                      "PTLLND_MSGS_SPARE",
-                                      PTLLND_MSGS_SPARE);
+                                      "PTLLND_MSGS_SPARE", 256);
         if (rc != 0)
                 return rc;
 
         rc = ptllnd_parse_int_tunable(&plni->plni_peer_hash_size,
-                                      "PTLLND_PEER_HASH_SIZE",
-                                      PTLLND_PEER_HASH_SIZE);
+                                      "PTLLND_PEER_HASH_SIZE", 101);
         if (rc != 0)
                 return rc;
 
 
         rc = ptllnd_parse_int_tunable(&plni->plni_eq_size,
-                                      "PTLLND_EQ_SIZE", PTLLND_EQ_SIZE);
+                                      "PTLLND_EQ_SIZE", 1024);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_checksum,
+                                      "PTLLND_CHECKSUM", 0);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history,
+                                      "PTLLND_TX_HISTORY",
+                                      plni->plni_debug ? 1024 : 0);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_protocol_mismatch,
+                                      "PTLLND_ABORT_ON_PROTOCOL_MISMATCH", 1);
         if (rc != 0)
                 return rc;
 
-       rc = ptllnd_parse_int_tunable(&plni->plni_checksum,
-                                     "PTLLND_CHECKSUM", 0);
-       if (rc != 0)
-               return rc;
+        rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak,
+                                      "PTLLND_ABORT_ON_NAK", 0);
+        if (rc != 0)
+                return rc;
 
-       rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history,
-                                     "PTLLND_TX_HISTORY", PTLLND_TX_HISTORY);
-       if (rc != 0)
-               return rc;
+        rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak,
+                                      "PTLLND_DUMP_ON_NAK", plni->plni_debug);
+        if (rc != 0)
+                return rc;
 
-       rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak,
-                                     "PTLLND_ABORT_ON_NAK",
-                                     PTLLND_ABORT_ON_NAK);
-       if (rc != 0)
-               return rc;
+        rc = ptllnd_parse_int_tunable(&plni->plni_watchdog_interval,
+                                      "PTLLND_WATCHDOG_INTERVAL", 1);
+        if (rc != 0)
+                return rc;
+        if (plni->plni_watchdog_interval <= 0)
+                plni->plni_watchdog_interval = 1;
 
-       rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak,
-                                     "PTLLND_DUMP_ON_NAK",
-                                     PTLLND_DUMP_ON_NAK);
-       if (rc != 0)
-               return rc;
+        rc = ptllnd_parse_int_tunable(&plni->plni_timeout,
+                                      "PTLLND_TIMEOUT", 50);
+        if (rc != 0)
+                return rc;
+
+        rc = ptllnd_parse_int_tunable(&plni->plni_long_wait,
+                                      "PTLLND_LONG_WAIT",
+                                      plni->plni_debug ? 5 : plni->plni_timeout);
+        if (rc != 0)
+                return rc;
+        plni->plni_long_wait *= 1000;           /* convert to mS */
 
         plni->plni_max_msg_size = max_msg_size & ~7;
         if (plni->plni_max_msg_size < PTLLND_MIN_BUFFER_SIZE)
                 plni->plni_max_msg_size = PTLLND_MIN_BUFFER_SIZE;
-       CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
-       CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
+        CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
+        CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
 
         plni->plni_buffer_size = plni->plni_max_msg_size * msgs_per_buffer;
 
@@ -364,7 +413,7 @@ ptllnd_create_buffer (lnet_ni_t *ni)
                 return NULL;
         }
 
-        list_add(&buf->plb_list, &plni->plni_buffers);
+        cfs_list_add(&buf->plb_list, &plni->plni_buffers);
         plni->plni_nbuffers++;
 
         return buf;
@@ -378,7 +427,7 @@ ptllnd_destroy_buffer (ptllnd_buffer_t *buf)
         LASSERT (!buf->plb_posted);
 
         plni->plni_nbuffers--;
-        list_del(&buf->plb_list);
+        cfs_list_del(&buf->plb_list);
         LIBCFS_FREE(buf->plb_buffer, plni->plni_buffer_size);
         LIBCFS_FREE(buf, sizeof(*buf));
 }
@@ -395,9 +444,9 @@ ptllnd_size_buffers (lnet_ni_t *ni, int delta)
         CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
         CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers);
 
-       plni->plni_nmsgs += delta;
-       LASSERT(plni->plni_nmsgs >= 0);
-       
+        plni->plni_nmsgs += delta;
+        LASSERT(plni->plni_nmsgs >= 0);
+
         nmsgs = plni->plni_nmsgs + plni->plni_msgs_spare;
 
         nbufs = (nmsgs * plni->plni_max_msg_size + plni->plni_buffer_size - 1) /
@@ -431,34 +480,35 @@ ptllnd_destroy_buffers (lnet_ni_t *ni)
 {
         ptllnd_ni_t       *plni = ni->ni_data;
         ptllnd_buffer_t   *buf;
-        struct list_head  *tmp;
-        struct list_head  *nxt;
+        cfs_list_t        *tmp;
+        cfs_list_t        *nxt;
 
         CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
         CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers);
 
-        list_for_each_safe(tmp, nxt, &plni->plni_buffers) {
-                buf = list_entry(tmp, ptllnd_buffer_t, plb_list);
+        cfs_list_for_each_safe(tmp, nxt, &plni->plni_buffers) {
+                buf = cfs_list_entry(tmp, ptllnd_buffer_t, plb_list);
 
                 //CDEBUG(D_NET, "buf=%p posted=%d\n",buf,buf->plb_posted);
 
                 LASSERT (plni->plni_nbuffers > 0);
                 if (buf->plb_posted) {
-                       time_t   start = cfs_time_current_sec();
-                       int      w = PTLLND_WARN_LONG_WAIT;
-                       
+                        time_t   start = cfs_time_current_sec();
+                        int      w = plni->plni_long_wait;
+
                         LASSERT (plni->plni_nposted_buffers > 0);
 
 #ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
                         (void) PtlMDUnlink(buf->plb_md);
 
-                       while (buf->plb_posted) {
-                               if (cfs_time_current_sec() > start + w) {
-                                       CWARN("Waited %ds to unlink buffer\n", w);
-                                       w *= 2;
-                               }
-                               ptllnd_wait(ni, w*1000);
-                       }
+                        while (buf->plb_posted) {
+                                if (w > 0 && cfs_time_current_sec() > start + w/1000) {
+                                        CWARN("Waited %ds to unlink buffer\n",
+                                              (int)(cfs_time_current_sec() - start));
+                                        w *= 2;
+                                }
+                                ptllnd_wait(ni, w);
+                        }
 #else
                         while (buf->plb_posted) {
                                 rc = PtlMDUnlink(buf->plb_md);
@@ -468,11 +518,12 @@ ptllnd_destroy_buffers (lnet_ni_t *ni)
                                         break;
                                 }
                                 LASSERT (rc == PTL_MD_IN_USE);
-                               if (cfs_time_current_sec() > start + w) {
-                                       CWARN("Waited %ds to unlink buffer\n", w);
-                                       w *= 2;
-                               }
-                               ptllnd_wait(ni, w*1000);
+                                if (w > 0 && cfs_time_current_sec() > start + w/1000) {
+                                        CWARN("Waited %ds to unlink buffer\n",
+                                              cfs_time_current_sec() - start);
+                                        w *= 2;
+                                }
+                                ptllnd_wait(ni, w);
                         }
 #endif
                 }
@@ -517,7 +568,7 @@ ptllnd_destroy_peer_hash (lnet_ni_t *ni)
         LASSERT( plni->plni_npeers == 0);
 
         for (i = 0; i < plni->plni_peer_hash_size; i++)
-                LASSERT (list_empty(&plni->plni_peer_hash[i]));
+                LASSERT (cfs_list_empty(&plni->plni_peer_hash[i]));
 
         LIBCFS_FREE(plni->plni_peer_hash,
                     plni->plni_peer_hash_size * sizeof(*plni->plni_peer_hash));
@@ -531,9 +582,9 @@ ptllnd_close_peers (lnet_ni_t *ni)
         int             i;
 
         for (i = 0; i < plni->plni_peer_hash_size; i++)
-                while (!list_empty(&plni->plni_peer_hash[i])) {
-                        plp = list_entry(plni->plni_peer_hash[i].next,
-                                         ptllnd_peer_t, plp_list);
+                while (!cfs_list_empty(&plni->plni_peer_hash[i])) {
+                        plp = cfs_list_entry(plni->plni_peer_hash[i].next,
+                                             ptllnd_peer_t, plp_list);
 
                         ptllnd_close_peer(plp, 0);
                 }
@@ -542,14 +593,14 @@ ptllnd_close_peers (lnet_ni_t *ni)
 int
 ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
 {
-       switch (cmd) {
-       case IOC_LIBCFS_DEBUG_PEER:
-               ptllnd_debug_peer(ni, *((lnet_process_id_t *)arg));
-               return 0;
-               
-       default:
-               return -EINVAL;
-       }
+        switch (cmd) {
+        case IOC_LIBCFS_DEBUG_PEER:
+                ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg));
+                return 0;
+
+        default:
+                return -EINVAL;
+        }
 }
 
 __u64
@@ -567,24 +618,25 @@ ptllnd_shutdown (lnet_ni_t *ni)
 {
         ptllnd_ni_t *plni = ni->ni_data;
         int          rc;
-       time_t       start = cfs_time_current_sec();
-       int          w = PTLLND_WARN_LONG_WAIT;
+        time_t       start = cfs_time_current_sec();
+        int          w = plni->plni_long_wait;
 
         LASSERT (ptllnd_ni_count == 1);
-       plni->plni_max_tx_history = 0;
+        plni->plni_max_tx_history = 0;
 
-       ptllnd_cull_tx_history(plni);
+        ptllnd_cull_tx_history(plni);
 
         ptllnd_close_peers(ni);
         ptllnd_destroy_buffers(ni);
 
         while (plni->plni_npeers > 0) {
-               if (cfs_time_current_sec() > start + w) {
-                       CWARN("Waited %ds for peers to shutdown\n", w);
-                       w *= 2;
-               }
-                ptllnd_wait(ni, w*1000);
-       }
+                if (w > 0 && cfs_time_current_sec() > start + w/1000) {
+                        CWARN("Waited %ds for peers to shutdown\n",
+                              (int)(cfs_time_current_sec() - start));
+                        w *= 2;
+                }
+                ptllnd_wait(ni, w);
+        }
 
         LASSERT (plni->plni_ntxs == 0);
         LASSERT (plni->plni_nrxs == 0);
@@ -606,9 +658,9 @@ ptllnd_startup (lnet_ni_t *ni)
         ptllnd_ni_t *plni;
         int          rc;
 
-       /* could get limits from portals I guess... */
-       ni->ni_maxtxcredits =
-       ni->ni_peertxcredits = 1000;
+        /* could get limits from portals I guess... */
+        ni->ni_maxtxcredits =
+        ni->ni_peertxcredits = 1000;
 
         if (ptllnd_ni_count != 0) {
                 CERROR("Can't have > 1 instance of ptllnd\n");
@@ -617,12 +669,12 @@ ptllnd_startup (lnet_ni_t *ni)
 
         ptllnd_ni_count++;
 
-       rc = ptllnd_history_init();
-       if (rc != 0) {
-               CERROR("Can't init history\n");
-               goto failed0;
-       }
-       
+        rc = ptllnd_history_init();
+        if (rc != 0) {
+                CERROR("Can't init history\n");
+                goto failed0;
+        }
+
         LIBCFS_ALLOC(plni, sizeof(*plni));
         if (plni == NULL) {
                 CERROR("Can't allocate ptllnd state\n");
@@ -635,7 +687,9 @@ ptllnd_startup (lnet_ni_t *ni)
         plni->plni_stamp = ptllnd_get_timestamp();
         plni->plni_nrxs = 0;
         plni->plni_ntxs = 0;
-       plni->plni_ntx_history = 0;
+        plni->plni_ntx_history = 0;
+        plni->plni_watchdog_peeridx = 0;
+        plni->plni_watchdog_nextt = cfs_time_current_sec();
         CFS_INIT_LIST_HEAD(&plni->plni_zombie_txs);
         CFS_INIT_LIST_HEAD(&plni->plni_tx_history);
 
@@ -661,7 +715,8 @@ ptllnd_startup (lnet_ni_t *ni)
         rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_ptllnd_pid,
                        NULL, NULL, &plni->plni_nih);
         if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                CERROR("PtlNIInit failed: %d\n", rc);
+                CERROR("PtlNIInit failed: %s(%d)\n",
+                       ptllnd_errtype2str(rc), rc);
                 rc = -ENODEV;
                 goto failed2;
         }
@@ -669,7 +724,8 @@ ptllnd_startup (lnet_ni_t *ni)
         rc = PtlEQAlloc(plni->plni_nih, plni->plni_eq_size,
                         PTL_EQ_HANDLER_NONE, &plni->plni_eqh);
         if (rc != PTL_OK) {
-                CERROR("PtlEQAlloc failed: %d\n", rc);
+                CERROR("PtlEQAlloc failed: %s(%d)\n",
+                       ptllnd_errtype2str(rc), rc);
                 rc = -ENODEV;
                 goto failed3;
         }
@@ -677,14 +733,14 @@ ptllnd_startup (lnet_ni_t *ni)
         /*
          * Fetch the Portals NID
          */
-        if(rc != PtlGetId(plni->plni_nih,&plni->plni_portals_id)){
-                CERROR ("PtlGetID failed : %d\n", rc);
+        rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id);
+        if (rc != PTL_OK) {
+                CERROR ("PtlGetID failed : %s(%d)\n",
+                        ptllnd_errtype2str(rc), rc);
                 rc = -EINVAL;
                 goto failed4;
         }
 
-        CDEBUG(D_NET, "lnet nid=" LPX64 " (passed in)\n",ni->ni_nid);
-
         /*
          * Create the new NID.  Based on the LND network type
          * and the lower ni's address data.
@@ -700,7 +756,7 @@ ptllnd_startup (lnet_ni_t *ni)
         if (rc != 0)
                 goto failed4;
 
-       return 0;
+        return 0;
 
  failed4:
         ptllnd_destroy_buffers(ni);
@@ -712,7 +768,7 @@ ptllnd_startup (lnet_ni_t *ni)
  failed1:
         LIBCFS_FREE(plni, sizeof(*plni));
  failed0:
-       ptllnd_history_fini();
+        ptllnd_history_fini();
         ptllnd_ni_count--;
         CDEBUG(D_NET, "<<< rc=%d\n",rc);
         return rc;