Whamcloud - gitweb
Land b_release_1_4_3 onto HEAD (20050619_0305)
authoradilger <adilger>
Sun, 19 Jun 2005 09:18:22 +0000 (09:18 +0000)
committeradilger <adilger>
Sun, 19 Jun 2005 09:18:22 +0000 (09:18 +0000)
b=6411 : enable rate-limiting of console error messages, and some console
         errors now go only to the kernel log.  Use CERROR/CWARN where
 appropriate to allow rate-limiting of these messages.
b=1693 : add /proc/sys/portals/catastrophe entry which will report if
         that node has previously LBUGged.

20 files changed:
lnet/ChangeLog
lnet/include/libcfs/darwin/kp30.h
lnet/include/libcfs/libcfs.h
lnet/include/libcfs/linux/kp30.h
lnet/klnds/gmlnd/gmlnd_api.c
lnet/klnds/gmlnd/gmlnd_cb.c
lnet/klnds/gmlnd/gmlnd_comm.c
lnet/klnds/gmlnd/gmlnd_module.c
lnet/klnds/gmlnd/gmlnd_utils.c
lnet/klnds/iiblnd/iiblnd_cb.c
lnet/klnds/openiblnd/openiblnd.c
lnet/klnds/openiblnd/openiblnd_cb.c
lnet/klnds/ralnd/ralnd.c
lnet/klnds/ralnd/ralnd_cb.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd_cb.c
lnet/libcfs/darwin/darwin-proc.c
lnet/libcfs/debug.c
lnet/libcfs/linux/linux-proc.c
lnet/utils/acceptor.c

index 27ad0b2..3d8fcc8 100644 (file)
@@ -1,3 +1,20 @@
+2005-06-02  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.4.3
+       * bug fixes
+
+Severity   : major
+Frequency  : occasional (large-scale events, cluster reboot, network failure)
+Bugzilla   : 6411
+Description: too many error messages on console obscure actual problem and
+             can slow down/panic server, or cause recovery to fail repeatedly
+Details    : enable rate-limiting of console error messages, and some messages
+             that were console errors now only go to the kernel log
+
+Severity   : enhancement
+Bugzilla   : 1693
+Description: add /proc/sys/portals/catastrophe entry which will report if
+             that node has previously LBUGged
+
 2005-04-06  Cluster File Systems, Inc. <info@clusterfs.com>
        * bugs
        - update gmnal to use PTL_MTU, fix module refcounting (b=5786)
index 7f765e1..5c1acc4 100644 (file)
@@ -32,7 +32,7 @@
 #define LASSERT_SPIN_LOCKED(lock) do {} while(0)
 #endif
 
-#define LBUG_WITH_LOC(file, func, line)         do {} while(0)
+#define LBUG_WITH_LOC(file, func, line)         portals_catastrophe = 1
 
 /* --------------------------------------------------------------------- */
 
index eca3488..6f3ee42 100644 (file)
@@ -38,6 +38,9 @@ extern unsigned int portal_stack;
 extern unsigned int portal_debug;
 extern unsigned int portal_printk;
 
+/* Has there been an LBUG? */
+extern unsigned int portals_catastrophe;
+
 /*
  * struct ptldebug_header is defined in libcfs/<os>/libcfs.h
  */
index a4e0b21..d2329ba 100644 (file)
@@ -93,6 +93,7 @@ static inline void our_cond_resched(void)
 #define LBUG_WITH_LOC(file, func, line)                                 \
 do {                                                                    \
         CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n");       \
+        portals_catastrophe = 1;                                        \
         portals_debug_dumplog();                                        \
         portals_run_lbug_upcall(file, func, line);                      \
         panic("LBUG");                                                  \
@@ -101,6 +102,7 @@ do {                                                                    \
 #define LBUG_WITH_LOC(file, func, line)                                 \
 do {                                                                    \
         CEMERG("LBUG\n");                                               \
+        portals_catastrophe = 1;                                        \
         portals_debug_dumpstack(NULL);                                  \
         portals_debug_dumplog();                                        \
         portals_run_lbug_upcall(file, func, line);                      \
index a65272a..bf182b4 100644 (file)
@@ -134,7 +134,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 
        PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
        if (!nal_data) {
-               CDEBUG(D_ERROR, "can't get memory\n");
+               CERROR("can't get memory\n");
                return(PTL_NO_SPACE);
        }       
        memset(nal_data, 0, sizeof(gmnal_data_t));
@@ -169,7 +169,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
         */
        CDEBUG(D_INFO, "Calling gm_init\n");
        if (gm_init() != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "call to gm_init failed\n");
+               CERROR("call to gm_init failed\n");
                PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
                PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
@@ -187,29 +187,27 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 
        CDEBUG(D_INFO, "gm_open returned [%d]\n", gm_status);
        if (gm_status == GM_SUCCESS) {
-               CDEBUG(D_INFO, "gm_open succeeded port[%p]\n", 
-                      nal_data->gm_port);
+               CDEBUG(D_INFO,"gm_open succeeded port[%p]\n",nal_data->gm_port);
        } else {
                switch(gm_status) {
                case(GM_INVALID_PARAMETER):
-                       CDEBUG(D_ERROR, "gm_open Failure. Invalid Parameter\n");
+                       CERROR("gm_open Failure. Invalid Parameter\n");
                        break;
                case(GM_BUSY):
-                       CDEBUG(D_ERROR, "gm_open Failure. GM Busy\n");
+                       CERROR("gm_open Failure. GM Busy\n");
                        break;
                case(GM_NO_SUCH_DEVICE):
-                       CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
+                       CERROR("gm_open Failure. No such device\n");
                        break;
                case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
-                              "and driver\n");
+                       CERROR("gm_open Failure. Incompatile lib and driver\n");
                        break;
                case(GM_OUT_OF_MEMORY):
-                       CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
+                       CERROR("gm_open Failure. Out of Memory\n");
                        break;
                default:
-                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
-                              "code [%d]\n", gm_status);
+                       CERROR("gm_open Failure. Unknow error code [%d]\n",
+                               gm_status);
                        break;
                }       
                GMNAL_GM_LOCK(nal_data);
@@ -225,7 +223,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                        gm_min_size_for_length(gmnal_small_msg_size);
 
        if (gmnal_alloc_srxd(nal_data) != GMNAL_STATUS_OK) {
-               CDEBUG(D_ERROR, "Failed to allocate small rx descriptors\n");
+               CERROR("Failed to allocate small rx descriptors\n");
                gmnal_free_txd(nal_data);
                GMNAL_GM_LOCK(nal_data);
                gm_close(nal_data->gm_port);
@@ -255,7 +253,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
         *      Allocate pools of small tx buffers and descriptors
         */
        if (gmnal_alloc_txd(nal_data) != GMNAL_STATUS_OK) {
-               CDEBUG(D_ERROR, "Failed to allocate small tx descriptors\n");
+               CERROR("Failed to allocate small tx descriptors\n");
                GMNAL_GM_LOCK(nal_data);
                gm_close(nal_data->gm_port);
                gm_finalize();
@@ -285,7 +283,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
        if (gm_status != GM_SUCCESS) {
                gmnal_stop_rxthread(nal_data);
                gmnal_stop_ctthread(nal_data);
-               CDEBUG(D_ERROR, "can't determine node id\n");
+               CERROR("can't determine node id\n");
                gmnal_free_txd(nal_data);
                gmnal_free_srxd(nal_data);
                GMNAL_GM_LOCK(nal_data);
@@ -305,7 +303,7 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                                            &global_nid);
        GMNAL_GM_UNLOCK(nal_data);
        if (gm_status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "failed to obtain global id\n");
+               CERROR("failed to obtain global id\n");
                gmnal_stop_rxthread(nal_data);
                gmnal_stop_ctthread(nal_data);
                gmnal_free_txd(nal_data);
@@ -327,14 +325,14 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 */
         process_id.pid = requested_pid;
         process_id.nid = global_nid;
-        
+
        CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid);
        CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid);
-       
+
        CDEBUG(D_PORTALS, "calling lib_init\n");
-       if (lib_init(libnal, nal, process_id, 
+       if (lib_init(libnal, nal, process_id,
                      requested_limits, actual_limits) != PTL_OK) {
-               CDEBUG(D_ERROR, "lib_init failed\n");
+               CERROR("lib_init failed\n");
                gmnal_stop_rxthread(nal_data);
                gmnal_stop_ctthread(nal_data);
                gmnal_free_txd(nal_data);
@@ -343,10 +341,9 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
                gm_close(nal_data->gm_port);
                gm_finalize();
                GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
+               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
                PORTAL_FREE(libnal, sizeof(lib_nal_t));
                return(PTL_FAIL);
-               
        }
 
        if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, libnal->libnal_data) != 0) {
index 6394c37..ddff6b9 100644 (file)
@@ -166,7 +166,7 @@ ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
                "] nid["LPU64"]\n", niov, offset, len, nid);
        nal_data = libnal->libnal_data;
        if (!nal_data) {
-               CDEBUG(D_ERROR, "no nal_data\n");
+               CERROR("no nal_data\n");
                return(PTL_FAIL);
        } else {
                CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
@@ -205,7 +205,7 @@ ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
                gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid,
                               stxd,  len);
        } else {
-               CDEBUG(D_ERROR, "Large message send is not supported\n");
+               CERROR("Large message send is not supported\n");
                lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
                gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid,
@@ -230,7 +230,7 @@ ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private,
                LPSZ"] len["LPSZ"]\n", nid, kniov, offset, len);
        nal_data = libnal->libnal_data;
        if (!nal_data) {
-               CDEBUG(D_ERROR, "no nal_data\n");
+               CERROR("no nal_data\n");
                return(PTL_FAIL);
        } else {
                CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
@@ -292,7 +292,7 @@ ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private,
 
                PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
                iovec_dup = iovec;
-               CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
+               CERROR("Large message send it is not supported yet\n");
                PORTAL_FREE(iovec, kniov*sizeof(struct iovec));
                return(PTL_FAIL);
                for (i=0; i<kniov; i++) {
index 206d86b..60e5d67 100644 (file)
@@ -155,19 +155,16 @@ int gmnal_rx_thread(void *arg)
                buffer = we->buffer;
                switch(((gmnal_msghdr_t*)buffer)->type) {
                case(GMNAL_SMALL_MESSAGE):
-                       gmnal_pre_receive(nal_data, we, 
-                                          GMNAL_SMALL_MESSAGE);
-               break;  
+                       gmnal_pre_receive(nal_data, we, GMNAL_SMALL_MESSAGE);
+               break;
                case(GMNAL_LARGE_MESSAGE_INIT):
-                       gmnal_pre_receive(nal_data, we, 
-                                          GMNAL_LARGE_MESSAGE_INIT);
-               break;  
+                       gmnal_pre_receive(nal_data,we,GMNAL_LARGE_MESSAGE_INIT);
+               break;
                case(GMNAL_LARGE_MESSAGE_ACK):
-                       gmnal_pre_receive(nal_data, we, 
-                                          GMNAL_LARGE_MESSAGE_ACK);
-               break;  
+                       gmnal_pre_receive(nal_data, we,GMNAL_LARGE_MESSAGE_ACK);
+               break;
                default:
-                       CDEBUG(D_ERROR, "Unsupported message type\n");
+                       CERROR("Unsupported message type\n");
                        gmnal_rx_bad(nal_data, we, NULL);
                }
                PORTAL_FREE(we, sizeof(gmnal_rxtwe_t));
@@ -200,7 +197,7 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
        ptl_hdr_t       *portals_hdr;
         int              rc;
 
-       CDEBUG(D_INFO, "nal_data [%p], we[%p] type [%d]\n", 
+       CDEBUG(D_INFO, "nal_data [%p], we[%p] type [%d]\n",
               nal_data, we, gmnal_type);
 
        buffer = we->buffer;
@@ -217,20 +214,19 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
               "type [%d], length [%d], buffer [%p]\n",
               snode, sport, type, length, buffer);
        CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], "
-              "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
+              "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id,
               gmnal_msghdr->magic, gmnal_msghdr->type);
        CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], "
-              "dest_node ["LPD64"]\n", portals_hdr->src_nid, 
+              "dest_node ["LPD64"]\n", portals_hdr->src_nid,
               portals_hdr->dest_nid);
 
-       
        /*
-        *      Get a receive descriptor for this message
+        *      Get a receive descriptor for this message
         */
        srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer);
        CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n");
        if (!srxd) {
-               CDEBUG(D_ERROR, "Failed to get receive descriptor\n");
+               CERROR("Failed to get receive descriptor\n");
                 /* I think passing a NULL srxd to lib_parse will crash
                  * gmnal_recv() */
                 LBUG();
@@ -239,7 +235,7 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
        }
 
        /*
-        *      no need to bother portals library with this
+        *      no need to bother portals library with this
         */
        if (gmnal_type == GMNAL_LARGE_MESSAGE_ACK) {
                gmnal_large_tx_ack_received(nal_data, srxd);
@@ -250,8 +246,8 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
        srxd->type = gmnal_type;
        srxd->nsiov = gmnal_msghdr->niov;
        srxd->gm_source_node = gmnal_msghdr->sender_node_id;
-       
-       CDEBUG(D_PORTALS, "Calling lib_parse buffer is [%p]\n", 
+
+       CDEBUG(D_PORTALS, "Calling lib_parse buffer is [%p]\n",
               buffer+GMNAL_MSGHDR_SIZE);
        /*
         *      control passes to lib, which calls cb_recv 
@@ -306,7 +302,7 @@ gmnal_rx_bad(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, gmnal_srxd_t *srxd)
        if (srxd) {
                gmnal_rx_requeue_buffer(nal_data, srxd);
        } else {
-               CDEBUG(D_ERROR, "Can't find a descriptor for this buffer\n");
+               CERROR("Can't find a descriptor for this buffer\n");
                /*
                 *      get rid of it ?
                 */
@@ -334,7 +330,7 @@ gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie)
 
 
        if (!private) {
-               CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
+               CERROR("gmnal_small_rx no context\n");
                lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
@@ -386,7 +382,7 @@ gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
               hdr->dest_nid, hdr->src_nid);
 
        if (!nal_data) {
-               CDEBUG(D_ERROR, "no nal_data\n");
+               CERROR("no nal_data\n");
                return(PTL_FAIL);
        } else {
                CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
@@ -397,7 +393,7 @@ gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
                                            &local_nid);
        GMNAL_GM_UNLOCK(nal_data);
        if (gm_status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "Failed to obtain local id\n");
+               CERROR("Failed to obtain local id\n");
                return(PTL_FAIL);
        }
        CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid);
@@ -431,20 +427,20 @@ gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
 
 
        CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
-              "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
-              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+              "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
               stxd->msg_size, global_nid, local_nid, stxd);
 
        GMNAL_GM_LOCK(nal_data);
        stxd->gm_priority = GM_LOW_PRIORITY;
        stxd->gm_target_node = local_nid;
-       gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, 
-                                     stxd->gm_size, stxd->msg_size, 
-                                     GM_LOW_PRIORITY, local_nid, 
+       gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer,
+                                     stxd->gm_size, stxd->msg_size,
+                                     GM_LOW_PRIORITY, local_nid,
                                      gmnal_small_tx_callback, (void*)stxd);
        GMNAL_GM_UNLOCK(nal_data);
        CDEBUG(D_INFO, "done\n");
-               
+
        return(PTL_OK);
 }
 
@@ -480,7 +476,7 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                               gm_status);
                        gnid = 0;
                }
-               CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s] to [%u]\n",
+               CERROR("Result of send stxd [%p] is [%s] to [%u]\n",
                       stxd, gmnal_gm_error(status), gnid);
        }
 
@@ -494,22 +490,20 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                /*
                 *      do a resend on the dropped ones
                 */
-                       CDEBUG(D_ERROR, "send stxd [%p] was dropped "
-                              "resending\n", context);
+                       CERROR("send stxd [%p] dropped, resending\n", context);
                        GMNAL_GM_LOCK(nal_data);
-                       gm_send_to_peer_with_callback(nal_data->gm_port, 
-                                                     stxd->buffer, 
-                                                     stxd->gm_size, 
-                                                     stxd->msg_size, 
-                                                     stxd->gm_priority, 
-                                                     stxd->gm_target_node, 
+                       gm_send_to_peer_with_callback(nal_data->gm_port,
+                                                     stxd->buffer,
+                                                     stxd->gm_size,
+                                                     stxd->msg_size,
+                                                     stxd->gm_priority,
+                                                     stxd->gm_target_node,
                                                      gmnal_small_tx_callback,
                                                      context);
                        GMNAL_GM_UNLOCK(nal_data);
-               
                return;
-               case(GM_TIMED_OUT):
-               case(GM_SEND_TIMED_OUT):
+               case(GM_TIMED_OUT):
+               case(GM_SEND_TIMED_OUT):
                /*
                 *      drop these ones
                 */
@@ -628,7 +622,7 @@ void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
                                              context);
                GMNAL_GM_UNLOCK(nal_data);
        } else {
-               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
+               CERROR("send_to_peer status for stxd [%p] is "
                       "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
        }
 
@@ -669,7 +663,7 @@ gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
        if (libnal)
                nal_data = (gmnal_data_t*)libnal->libnal_data;
        else  {
-               CDEBUG(D_ERROR, "no libnal.\n");
+               CERROR("no libnal.\n");
                return(GMNAL_STATUS_FAIL);
        }
        
@@ -755,7 +749,7 @@ gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
                                               iov->iov_base, iov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+                       CERROR("gm_register_memory returns [%d][%s] "
                               "for memory [%p] len ["LPSZ"]\n", 
                               gm_status, gmnal_gm_error(gm_status), 
                               iov->iov_base, iov->iov_len);
@@ -784,7 +778,7 @@ gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
                                            &local_nid);
        if (gm_status != GM_SUCCESS) {
                GMNAL_GM_UNLOCK(nal_data);
-               CDEBUG(D_ERROR, "Failed to obtain local id\n");
+               CERROR("Failed to obtain local id\n");
                gmnal_return_stxd(nal_data, stxd);
                /* TO DO deregister memory on failure */
                return(GMNAL_STATUS_FAIL);
@@ -795,9 +789,9 @@ gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
                                      local_nid, gmnal_large_tx_callback, 
                                      (void*)stxd);
        GMNAL_GM_UNLOCK(nal_data);
-       
+
        CDEBUG(D_INFO, "done\n");
-               
+
        return(PTL_OK);
 }
 
@@ -837,7 +831,7 @@ gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
                libnal, private, cookie, nriov, riov, mlen, rlen);
 
        if (!srxd) {
-               CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
+               CERROR("gmnal_large_rx no context\n");
                lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
        }
@@ -880,21 +874,21 @@ gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
         if (nriov > 1)
                gm_bcopy(&riov[1], &srxd->riov[1], (nriov-1)*(sizeof(struct iovec)));
        srxd->nriov = nriov;
-        
+
         riov = srxd->riov;
        nriov_dup = nriov;
        riov_dup = riov;
        while(nriov--) {
-               CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n", 
+               CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n",
                       riov->iov_base, riov->iov_len);
                GMNAL_GM_LOCK(nal_data);
-               gm_status = gm_register_memory(nal_data->gm_port, 
+               gm_status = gm_register_memory(nal_data->gm_port,
                                               riov->iov_base, riov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
-                              "for memory [%p] len ["LPSZ"]\n", 
-                              gm_status, gmnal_gm_error(gm_status), 
+                       CERROR("gm_register_memory returns [%d][%s] "
+                              "for memory [%p] len ["LPSZ"]\n",
+                              gm_status, gmnal_gm_error(gm_status),
                               riov->iov_base, riov->iov_len);
                        GMNAL_GM_LOCK(nal_data);
                        while (riov_dup != riov) {
@@ -918,9 +912,9 @@ gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
         *      now do gm_get to get the data
         */
        srxd->cookie = cookie;
-       if (gmnal_remote_get(srxd, srxd->nsiov, (struct iovec*)buffer, 
+       if (gmnal_remote_get(srxd, srxd->nsiov, (struct iovec*)buffer,
                              nriov_dup, riov_dup) != GMNAL_STATUS_OK) {
-               CDEBUG(D_ERROR, "can't get the data");
+               CERROR("can't get the data");
        }
 
        CDEBUG(D_INFO, "lgmanl_large_rx done\n");
@@ -949,7 +943,7 @@ gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov,
 
        ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov);
        if (ncalls < 0) {
-               CDEBUG(D_ERROR, "there's something wrong with the iovecs\n");
+               CERROR("there's something wrong with the iovecs\n");
                return(GMNAL_STATUS_FAIL);
        }
        CDEBUG(D_INFO, "gmnal_remote_get ncalls [%d]\n", ncalls);
@@ -959,7 +953,7 @@ gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov,
 
        ncalls = gmnal_copyiov(1, srxd, nsiov, siov, nriov, riov);
        if (ncalls < 0) {
-               CDEBUG(D_ERROR, "there's something wrong with the iovecs\n");
+               CERROR("there's something wrong with the iovecs\n");
                return(GMNAL_STATUS_FAIL);
        }
 
@@ -991,15 +985,15 @@ gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
        CDEBUG(D_TRACE, "copy[%d] nal_data[%p]\n", do_copy, nal_data);
        if (do_copy) {
                if (!nal_data) {
-                       CDEBUG(D_ERROR, "Bad args No nal_data\n");
+                       CERROR("Bad args No nal_data\n");
                        return(GMNAL_STATUS_FAIL);
                }
                GMNAL_GM_LOCK(nal_data);
-               if (gm_global_id_to_node_id(nal_data->gm_port, 
-                                           srxd->gm_source_node, 
+               if (gm_global_id_to_node_id(nal_data->gm_port,
+                                           srxd->gm_source_node,
                                            &source_node) != GM_SUCCESS) {
 
-                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] "
+                       CERROR("cannot resolve global_id [%u] "
                               "to local node_id\n", srxd->gm_source_node);
                        GMNAL_GM_UNLOCK(nal_data);
                        return(GMNAL_STATUS_FAIL);
@@ -1013,7 +1007,7 @@ gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
                 *      Set pointer in stxd to srxd so callback count in srxd
                 *      can be decremented to find last callback to complete
                 */
-               CDEBUG(D_INFO, "gmnal_copyiov source node is G[%u]L[%d]\n", 
+               CDEBUG(D_INFO, "gmnal_copyiov source node is G[%u]L[%d]\n",
                       srxd->gm_source_node, source_node);
        }
 
@@ -1124,8 +1118,7 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
        CDEBUG(D_TRACE, "called for context [%p]\n", context);
 
        if (status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "reports error [%d][%s]\n", status, 
-                      gmnal_gm_error(status));
+               CERROR("reports error [%d/%s]\n",status,gmnal_gm_error(status));
        }
 
        spin_lock(&srxd->callback_lock);
@@ -1144,11 +1137,11 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
                CDEBUG(D_ERROR, "NOT final callback context[%p]\n", srxd);
                return;
        }
-       
+
        /*
         *      Let our client application proceed
-        */     
-       CDEBUG(D_ERROR, "final callback context[%p]\n", srxd);
+        */
+       CERROR("final callback context[%p]\n", srxd);
        lib_finalize(libnal, srxd, srxd->cookie, PTL_OK);
 
        /*
@@ -1164,10 +1157,10 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
        riov = srxd->riov;
        GMNAL_GM_LOCK(nal_data);
        while (nriov--) {
-               CDEBUG(D_ERROR, "deregister memory [%p]\n", riov->iov_base);
-               if (gm_deregister_memory(srxd->nal_data->gm_port, 
-                                        riov->iov_base, riov->iov_len)) {
-                       CDEBUG(D_ERROR, "failed to deregister memory [%p]\n", 
+               CERROR("deregister memory [%p]\n", riov->iov_base);
+               if (gm_deregister_memory(srxd->nal_data->gm_port,
+                                        riov->iov_base, riov->iov_len)) {
+                       CERROR("failed to deregister memory [%p]\n",
                               riov->iov_base);
                }
                riov++;
@@ -1202,7 +1195,7 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
        unsigned int    local_nid;
        gm_status_t     gm_status = GM_SUCCESS;
 
-       CDEBUG(D_TRACE, "srxd[%p] target_node [%u]\n", srxd, 
+       CDEBUG(D_TRACE, "srxd[%p] target_node [%u]\n", srxd,
               srxd->gm_source_node);
 
        GMNAL_GM_LOCK(nal_data);
@@ -1210,7 +1203,7 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
                                            srxd->gm_source_node, &local_nid);
        GMNAL_GM_UNLOCK(nal_data);
        if (gm_status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "Failed to obtain local id\n");
+               CERROR("Failed to obtain local id\n");
                return;
        }
        CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid);
@@ -1244,20 +1237,20 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
 
        CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
               "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] "
-              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
               stxd->msg_size, srxd->gm_source_node, local_nid, stxd);
        GMNAL_GM_LOCK(nal_data);
        stxd->gm_priority = GM_LOW_PRIORITY;
        stxd->gm_target_node = local_nid;
-       gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, 
-                                     stxd->gm_size, stxd->msg_size, 
-                                     GM_LOW_PRIORITY, local_nid, 
-                                     gmnal_large_tx_ack_callback, 
+       gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer,
+                                     stxd->gm_size, stxd->msg_size,
+                                     GM_LOW_PRIORITY, local_nid,
+                                     gmnal_large_tx_ack_callback,
                                      (void*)stxd);
-       
+
        GMNAL_GM_UNLOCK(nal_data);
        CDEBUG(D_INFO, "gmnal_large_tx_ack :: done\n");
-               
+
        return;
 }
 
@@ -1265,19 +1258,19 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
 /*
  *     A callback to indicate the small transmit operation is compete
  *     Check for errors and try to deal with them.
- *     Call lib_finalise to inform the client application that the 
+ *     Call lib_finalise to inform the client application that the
  *     send is complete and the memory can be reused.
  *     Return the stxd when finished with it (returns a send token)
  */
-void 
-gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context, 
+void
+gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context,
                             gm_status_t status)
 {
        gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
        gmnal_data_t    *nal_data = (gmnal_data_t*)stxd->nal_data;
 
        if (!stxd) {
-               CDEBUG(D_ERROR, "send completion event for unknown stxd\n");
+               CERROR("send completion event for unknown stxd\n");
                return;
        }
        CDEBUG(D_TRACE, "send completion event for stxd [%p] status is [%d]\n",
index 3851649..3dd09b3 100644 (file)
@@ -42,7 +42,7 @@ gmnal_cmd(struct portals_cfg *pcfg, void *private)
        gm_status_t     gm_status;
 
 
-       CDEBUG(D_TRACE, "gmnal_cmd [%d] private [%p]\n", 
+       CDEBUG(D_TRACE, "gmnal_cmd [%d] private [%p]\n",
               pcfg->pcfg_command, private);
        nal_data = (gmnal_data_t*)private;
        switch(pcfg->pcfg_command) {
@@ -53,23 +53,24 @@ gmnal_cmd(struct portals_cfg *pcfg, void *private)
 
                PORTAL_ALLOC(name, pcfg->pcfg_plen1);
                copy_from_user(name, PCFG_PBUF(pcfg, 1), pcfg->pcfg_plen1);
-       
+
                GMNAL_GM_LOCK(nal_data);
                //nid = gm_host_name_to_node_id(nal_data->gm_port, name);
-                gm_status = gm_host_name_to_node_id_ex (nal_data->gm_port, 0, name, &nid);
+                gm_status = gm_host_name_to_node_id_ex(nal_data->gm_port, 0,
+                                                       name, &nid);
                GMNAL_GM_UNLOCK(nal_data);
                 if (gm_status != GM_SUCCESS) {
-                        CDEBUG(D_INFO, "gm_host_name_to_node_id_ex(...host %s) failed[%d]\n",
-                                name, gm_status);
+                        CDEBUG(D_INFO, "gm_host_name_to_node_id_ex(...host %s) "
+                               "failed[%d]\n", name, gm_status);
                         return (-1);
                 } else
                        CDEBUG(D_INFO, "Local node %s id is [%d]\n", name, nid);
                GMNAL_GM_LOCK(nal_data);
-               gm_status = gm_node_id_to_global_id(nal_data->gm_port, 
+               gm_status = gm_node_id_to_global_id(nal_data->gm_port,
                                                    nid, &gnid);
                GMNAL_GM_UNLOCK(nal_data);
                if (gm_status != GM_SUCCESS) {
-                       CDEBUG(D_INFO, "gm_node_id_to_global_id failed[%d]\n", 
+                       CDEBUG(D_INFO, "gm_node_id_to_global_id failed[%d]\n",
                               gm_status);
                        return(-1);
                }
@@ -100,12 +101,10 @@ gmnal_load(void)
        } else {
                CDEBUG(D_INFO, "Portals GMNAL Failed to initialise\n");
                return(-ENODEV);
-               
        }
 
        CDEBUG(D_INFO, "This is the end of the gmnal init routine");
 
-
        return(0);
 }
 
index 508a48c..a725088 100644 (file)
@@ -68,10 +68,10 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
        ntx = gm_num_send_tokens(nal_data->gm_port);
        GMNAL_GM_UNLOCK(nal_data);
        CDEBUG(D_INFO, "total number of send tokens available is [%d]\n", ntx);
-       
+
        /*
-        *      allocate a number for small sends
-        *      num_stxds from gmnal_module.c
+        *      allocate a number for small sends
+        *      num_stxds from gmnal_module.c
         */
        nstx = num_stxds;
        /*
@@ -84,19 +84,19 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
         */
        nltx = ntx - (nrxt_stx + nstx);
        if (nltx < 1) {
-               CDEBUG(D_ERROR, "No tokens available for large messages\n");
+               CERROR("No tokens available for large messages\n");
                return(GMNAL_STATUS_FAIL);
        }
 
 
        /*
-        * A semaphore is initialised with the 
+        * A semaphore is initialised with the
         * number of transmit tokens available.
         * To get a stxd, acquire the token semaphore.
-        * this decrements the available token count
-        * (if no tokens you block here, someone returning a 
+        * this decrements the available token count
+        * (if no tokens you block here, someone returning a
         * stxd will release the semaphore and wake you)
-        * When token is obtained acquire the spinlock 
+        * When token is obtained acquire the spinlock
         * to manipulate the list
         */
        GMNAL_TXD_TOKEN_INIT(nal_data, nstx);
@@ -105,21 +105,20 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
        GMNAL_RXT_TXD_LOCK_INIT(nal_data);
        GMNAL_LTXD_TOKEN_INIT(nal_data, nltx);
        GMNAL_LTXD_LOCK_INIT(nal_data);
-       
+
        for (i=0; i<=nstx; i++) {
                PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t));
                if (!txd) {
-                       CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i);
+                       CERROR("Failed to malloc txd [%d]\n", i);
                        return(GMNAL_STATUS_NOMEM);
                }
                GMNAL_GM_LOCK(nal_data);
-               txbuffer = gm_dma_malloc(nal_data->gm_port, 
+               txbuffer = gm_dma_malloc(nal_data->gm_port,
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
-                              " size [%d]\n", i, 
-                              GMNAL_SMALL_MSG_SIZE(nal_data));
+                       CERROR("Failed to gm_dma_malloc txbuffer [%d], "
+                              "size [%d]\n", i,GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
                }
@@ -138,7 +137,7 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
        for (i=0; i<=nrxt_stx; i++) {
                PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t));
                if (!txd) {
-                       CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i);
+                       CERROR("Failed to malloc txd [%d]\n", i);
                        return(GMNAL_STATUS_NOMEM);
                }
                GMNAL_GM_LOCK(nal_data);
@@ -146,9 +145,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
-                              " size [%d]\n", i, 
-                              GMNAL_SMALL_MSG_SIZE(nal_data));
+                       CERROR("Failed to gm_dma_malloc txbuffer [%d],"
+                              " size [%d]\n",i,GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
                }
@@ -252,7 +250,7 @@ gmnal_get_stxd(gmnal_data_t *nal_data, int block)
                         CDEBUG(D_PORTALS, "Got token\n");
                } else {
                        if (GMNAL_TXD_TRYGETTOKEN(nal_data)) {
-                               CDEBUG(D_ERROR, "can't get token\n");
+                               CERROR("can't get token\n");
                                return(NULL);
                        }
                }
@@ -260,7 +258,7 @@ gmnal_get_stxd(gmnal_data_t *nal_data, int block)
                txd = nal_data->stxd;
                nal_data->stxd = txd->next;
                GMNAL_TXD_UNLOCK(nal_data);
-               CDEBUG(D_INFO, "got [%p], head is [%p]\n", txd, 
+               CDEBUG(D_INFO, "got [%p], head is [%p]\n", txd,
                       nal_data->stxd);
                 txd->kniov = 0;
         }       /* general txd get */
@@ -273,7 +271,7 @@ gmnal_get_stxd(gmnal_data_t *nal_data, int block)
 void
 gmnal_return_stxd(gmnal_data_t *nal_data, gmnal_stxd_t *txd)
 {
-       CDEBUG(D_TRACE, "nal_data [%p], txd[%p] rxt[%d]\n", nal_data, 
+       CDEBUG(D_TRACE, "nal_data [%p], txd[%p] rxt[%d]\n", nal_data,
               txd, txd->rxt);
 
         /*
@@ -356,9 +354,9 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
        GMNAL_GM_LOCK(nal_data);
        nrx = gm_num_receive_tokens(nal_data->gm_port);
        GMNAL_GM_UNLOCK(nal_data);
-       CDEBUG(D_INFO, "total number of receive tokens available is [%d]\n", 
+       CDEBUG(D_INFO, "total number of receive tokens available is [%d]\n",
               nrx);
-       
+
        nsrx = nrx/2;
        nsrx = 12;
        /*
@@ -367,7 +365,7 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
         */
        nsrx = num_stxds*2 + 2;
 
-       CDEBUG(D_INFO, "Allocated [%d] receive tokens to small messages\n", 
+       CDEBUG(D_INFO, "Allocated [%d] receive tokens to small messages\n",
               nsrx);
 
 
@@ -376,7 +374,7 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                                             gm_hash_hash_ptr, 0, 0, nsrx, 0);
        GMNAL_GM_UNLOCK(nal_data);
        if (!nal_data->srxd_hash) {
-                       CDEBUG(D_ERROR, "Failed to create hash table\n");
+                       CERROR("Failed to create hash table\n");
                        return(GMNAL_STATUS_NOMEM);
        }
 
@@ -386,43 +384,40 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
        for (i=0; i<=nsrx; i++) {
                PORTAL_ALLOC(rxd, sizeof(gmnal_srxd_t));
                if (!rxd) {
-                       CDEBUG(D_ERROR, "Failed to malloc rxd [%d]\n", i);
+                       CERROR("Failed to malloc rxd [%d]\n", i);
                        return(GMNAL_STATUS_NOMEM);
                }
 #if 0
                PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], "
-                              "size [%d]\n", i, 
-                              GMNAL_SMALL_MSG_SIZE(nal_data));
+                       CERROR("Failed to malloc rxbuffer [%d], "
+                              "size [%d]\n", i,GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
                }
                CDEBUG(D_NET, "Calling gm_register_memory with port [%p] "
-                      "rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
+                      "rxbuffer [%p], size [%d]\n", nal_data->gm_port,
                       rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_LOCK(nal_data);
-               gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, 
+               gm_status = gm_register_memory(nal_data->gm_port, rxbuffer,
                                               GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (gm_status != GM_SUCCESS) {
-                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],"
+                       CERROR("gm_register_memory failed buffer [%p],"
                               " index [%d]\n", rxbuffer, i);
                        switch(gm_status) {
                                case(GM_FAILURE):
-                                       CDEBUG(D_ERROR, "GM_FAILURE\n");
+                                       CERROR("GM_FAILURE\n");
                                break;
                                case(GM_PERMISSION_DENIED):
-                                       CDEBUG(D_ERROR, "PERMISSION_DENIED\n");
+                                       CERROR("PERMISSION_DENIED\n");
                                break;
                                case(GM_INVALID_PARAMETER):
-                                       CDEBUG(D_ERROR, "INVALID_PARAMETER\n");
+                                       CERROR("INVALID_PARAMETER\n");
                                break;
                                default:
-                                       CDEBUG(D_ERROR, "Unknown error[%d]\n", 
-                                              gm_status);
+                                       CERROR("Unknown error[%d]\n",gm_status);
                                break;
-                               
                        }
                        return(GMNAL_STATUS_FAIL);
                }
@@ -432,22 +427,21 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],"
-                              " size [%d]\n", i, 
-                              GMNAL_SMALL_MSG_SIZE(nal_data));
+                       CERROR("Failed to gm_dma_malloc rxbuffer [%d], "
+                              "size [%d]\n",i ,GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
                }
 #endif
-               
+
                rxd->buffer = rxbuffer;
                rxd->size = GMNAL_SMALL_MSG_SIZE(nal_data);
                rxd->gmsize = gm_min_size_for_length(rxd->size);
 
-               if (gm_hash_insert(nal_data->srxd_hash, 
+               if (gm_hash_insert(nal_data->srxd_hash,
                                   (void*)rxbuffer, (void*)rxd)) {
 
-                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] "
+                       CERROR("failed to create hash entry rxd[%p] "
                               "for rxbuffer[%p]\n", rxd, rxbuffer);
                        return(GMNAL_STATUS_FAIL);
                }
@@ -584,7 +578,7 @@ gmnal_stop_rxthread(gmnal_data_t *nal_data)
        }
 
        if (nal_data->rxthread_flag != GMNAL_THREAD_RESET) {
-               CDEBUG(D_ERROR, "I don't know how to wake the thread\n");
+               CERROR("I don't know how to wake the thread\n");
        } else {
                CDEBUG(D_INFO, "rx thread seems to have stopped\n");
        }
@@ -612,7 +606,7 @@ gmnal_stop_ctthread(gmnal_data_t *nal_data)
        }
 
        if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) {
-               CDEBUG(D_ERROR, "I DON'T KNOW HOW TO WAKE THE THREAD\n");
+               CERROR("I DON'T KNOW HOW TO WAKE THE THREAD\n");
        } else {
                CDEBUG(D_INFO, "CT THREAD SEEMS TO HAVE STOPPED\n");
        }
@@ -889,7 +883,7 @@ gmnal_is_small_msg(gmnal_data_t *nal_data, int niov, struct iovec *iov,
                CDEBUG(D_INFO, "Yep, small message\n");
                return(1);
        } else {
-               CDEBUG(D_ERROR, "No, not small message\n");
+               CERROR("No, not small message\n");
                /*
                 *      could be made up of lots of little ones !
                 */
@@ -914,7 +908,7 @@ gmnal_add_rxtwe(gmnal_data_t *nal_data, gm_recv_t *recv)
 
        PORTAL_ALLOC(we, sizeof(gmnal_rxtwe_t));
        if (!we) {
-               CDEBUG(D_ERROR, "failed to malloc\n");
+               CERROR("failed to malloc\n");
                return(GMNAL_STATUS_FAIL);
        }
        we->buffer = gm_ntohp(recv->buffer);
@@ -981,7 +975,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
                        if (!nal_data->rxtwe_head)
                                nal_data->rxtwe_tail = NULL;
                } else {
-                       CDEBUG(D_WARNING, "woken but no work\n");
+                       CWARN("woken but no work\n");
                }
                spin_unlock(&nal_data->rxtwe_lock);
        } while (!we);
@@ -1016,7 +1010,7 @@ gmnal_start_kernel_threads(gmnal_data_t *nal_data)
        nal_data->ctthread_pid = 
                 kernel_thread(gmnal_ct_thread, (void*)nal_data, 0);
        if (nal_data->ctthread_pid <= 0) {
-               CDEBUG(D_ERROR, "Caretaker thread failed to start\n");
+               CERROR("Caretaker thread failed to start\n");
                return(GMNAL_STATUS_FAIL);
        }
 
@@ -1053,7 +1047,7 @@ gmnal_start_kernel_threads(gmnal_data_t *nal_data)
                nal_data->rxthread_pid[threads] = 
                       kernel_thread(gmnal_rx_thread, (void*)nal_data, 0);
                if (nal_data->rxthread_pid[threads] <= 0) {
-                       CDEBUG(D_ERROR, "Receive thread failed to start\n");
+                       CERROR("Receive thread failed to start\n");
                        gmnal_stop_rxthread(nal_data);
                        gmnal_stop_ctthread(nal_data);
                        return(GMNAL_STATUS_FAIL);
index b9ca677..eb9e6fa 100644 (file)
@@ -1486,7 +1486,7 @@ init_tx:
         } else {
                 LASSERT (tx->tx_nsp == 1);
                 /* No RDMA: local completion happens now! */
-                CDEBUG(D_WARNING,"No data: immediate completion\n");
+                CWARN("No data: immediate completion\n");
                 lib_finalize (&kibnal_lib, NULL, libmsg,
                               status == 0 ? PTL_OK : PTL_FAIL);
         }
@@ -2449,7 +2449,7 @@ kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
                 goto out;
         }
 
-        CDEBUG(D_WARNING, "Connection %p -> "LPX64" ESTABLISHED.\n",
+        CWARN("Connection %p -> "LPX64" ESTABLISHED.\n",
                conn, conn->ibc_peer->ibp_nid);
 
 out:
index 480c5aa..3862c5b 100644 (file)
@@ -800,7 +800,7 @@ kibnal_stop_ip_listener(int clear_acceptq)
         down(&kibnal_data.kib_listener_signal);
 
         LASSERT (kibnal_data.kib_listener_sock == NULL);
-        CDEBUG(D_WARNING, "Listener stopped\n");
+        CWARN("Listener stopped\n");
 
         if (!clear_acceptq)
                 return;
index dee5bd9..a356eaf 100644 (file)
@@ -1832,7 +1832,7 @@ kibnal_conn_callback (tTS_IB_CM_EVENT event,
                 break;
                 
         case TS_IB_CM_DISCONNECTED:
-                CDEBUG(D_WARNING, "Connection %p -> "LPX64" DISCONNECTED.\n",
+                CWARN("Connection %p -> "LPX64" DISCONNECTED.\n",
                        conn, conn->ibc_peer->ibp_nid);
                 kibnal_close_conn (conn, 0);
                 break;
@@ -1968,7 +1968,7 @@ kibnal_passive_conn_callback (tTS_IB_CM_EVENT event,
 
         case TS_IB_CM_ESTABLISHED:
                 LASSERT (conn != NULL);
-                CDEBUG(D_WARNING, "Connection %p -> "LPX64" ESTABLISHED.\n",
+                CWARN("Connection %p -> "LPX64" ESTABLISHED.\n",
                        conn, conn->ibc_peer->ibp_nid);
 
                 kibnal_connreq_done(conn, 0);
@@ -2037,7 +2037,7 @@ kibnal_active_conn_callback (tTS_IB_CM_EVENT event,
         }
 
         case TS_IB_CM_ESTABLISHED:
-                CDEBUG(D_WARNING, "Connection %p -> "LPX64" ESTABLISHED\n",
+                CWARN("Connection %p -> "LPX64" ESTABLISHED\n",
                        conn, conn->ibc_peer->ibp_nid);
 
                 kibnal_connreq_done(conn, 0);
index b7ae218..f984e6f 100644 (file)
@@ -920,7 +920,7 @@ kranal_conn_handshake (struct socket *sock, kra_peer_t *peer)
         if (nstale != 0)
                 CWARN("Closed %d stale conns to "LPX64"\n", nstale, peer_nid);
 
-        CDEBUG(D_WARNING, "New connection to "LPX64" on devid[%d] = %d\n",
+        CWARN("New connection to "LPX64" on devid[%d] = %d\n",
                peer_nid, conn->rac_device->rad_idx, conn->rac_device->rad_id);
 
         /* Ensure conn gets checked.  Transmits may have been queued and an
index b4184b5..dd910ce 100644 (file)
@@ -1438,7 +1438,7 @@ kranal_sendmsg(kra_conn_t *conn, kra_msg_t *msg,
         case RAP_NOT_DONE:
                 if (time_after_eq(jiffies,
                                   conn->rac_last_tx + conn->rac_keepalive*HZ))
-                        CDEBUG(D_WARNING, "EAGAIN sending %02x (idle %lu secs)\n",
+                        CWARN("EAGAIN sending %02x (idle %lu secs)\n",
                                msg->ram_type, (jiffies - conn->rac_last_tx)/HZ);
                 return -EAGAIN;
         }
@@ -1901,8 +1901,8 @@ kranal_complete_closed_conn (kra_conn_t *conn)
                 kranal_tx_done(tx, -ECONNABORTED);
         }
 
-        CDEBUG(D_WARNING, "Closed conn %p -> "LPX64": nmsg %d nreplies %d\n",
-               conn, conn->rac_peer->rap_nid, nfma, nreplies);
+        CWARN("Closed conn %p -> "LPX64": nmsg %d nreplies %d\n",
+              conn, conn->rac_peer->rap_nid, nfma, nreplies);
 }
 
 int
index 448871e..295ec35 100644 (file)
@@ -1132,9 +1132,10 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type)
 
         rc = ksocknal_close_stale_conns_locked(peer, incarnation);
         if (rc != 0)
-                CERROR ("Closed %d stale conns to nid "LPX64" ip %d.%d.%d.%d\n",
-                        rc, conn->ksnc_peer->ksnp_nid,
-                        HIPQUAD(conn->ksnc_ipaddr));
+                CDEBUG(D_HA,
+                       "Closed %d stale conns to nid "LPX64" ip %d.%d.%d.%d\n",
+                       rc, conn->ksnc_peer->ksnp_nid,
+                       HIPQUAD(conn->ksnc_ipaddr));
 
         write_unlock_irqrestore (global_lock, flags);
 
@@ -1146,11 +1147,11 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type)
                 ksocknal_putconnsock(conn);
         }
 
-        CWARN("New conn nid:"LPX64" %u.%u.%u.%u -> %u.%u.%u.%u/%d"
-              incarnation:"LPX64" sched[%d]/%d\n",
-              nid, HIPQUAD(conn->ksnc_myipaddr),
-              HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation,
-              (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
+        CDEBUG(D_HA, "New conn nid:"LPX64" %u.%u.%u.%u -> %u.%u.%u.%u/%d "
+               "incarnation:"LPX64" sched[%d]/%d\n",
+               nid, HIPQUAD(conn->ksnc_myipaddr),
+               HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation,
+               (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers),irq);
 
         ksocknal_put_conn (conn);
         return (0);
index ef8ca0f..bd26027 100644 (file)
@@ -102,7 +102,7 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
 
         if (rc <= 0)                            /* sent nothing? */ 
                 return (rc); 
-        
+
         nob = rc; 
         LASSERT (nob <= tx->tx_resid); 
         tx->tx_resid -= nob; 
@@ -130,7 +130,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         int      rc;
         int      bufnob;
-        
+
         if (ksocknal_data.ksnd_stall_tx != 0) {
                 set_current_state (TASK_UNINTERRUPTIBLE);
                 schedule_timeout (cfs_time_seconds(ksocknal_data.ksnd_stall_tx));
@@ -158,7 +158,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
                 bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock);
                 if (rc > 0)                     /* sent something? */
                         conn->ksnc_tx_bufnob += rc; /* account it */
-                
+
                 if (bufnob < conn->ksnc_tx_bufnob) {
                         /* allocated send buffer bytes < computed; infer
                          * something got ACKed */
@@ -182,7 +182,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
 
                         sched = conn->ksnc_scheduler;
                         spin_lock_irqsave(&sched->kss_lock, flags);
-                                
+
                         if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
                             !conn->ksnc_tx_ready) {
                                 /* SOCK_NOSPACE is set when the socket fills
@@ -228,12 +228,12 @@ ksocknal_recv_iov (ksock_conn_t *conn)
 
         /* received something... */ 
         nob = rc; 
-        
+
         conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); 
         conn->ksnc_rx_deadline = cfs_time_shift (ksocknal_tunables.ksnd_io_timeout); 
         mb();                           /* order with setting rx_started */ 
         conn->ksnc_rx_started = 1; 
-        
+
         conn->ksnc_rx_nob_wanted -= nob; 
         conn->ksnc_rx_nob_left -= nob;
 
@@ -265,10 +265,10 @@ ksocknal_recv_kiov (ksock_conn_t *conn)
         /* Never touch conn->ksnc_rx_kiov or change connection 
          * status inside ksocknal_lib_recv_iov */
         rc = ksocknal_lib_recv_kiov(conn); 
-        
+
         if (rc <= 0) 
                 return (rc); 
-        
+
         /* received something... */ 
         nob = rc; 
 
@@ -279,7 +279,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn)
 
         conn->ksnc_rx_nob_wanted -= nob; 
         conn->ksnc_rx_nob_left -= nob; 
-        
+
         do { 
                 LASSERT (conn->ksnc_rx_nkiov > 0); 
 
@@ -305,7 +305,7 @@ ksocknal_receive (ksock_conn_t *conn)
          * progress/completion. */
         int     rc;
         ENTRY;
-        
+
         if (ksocknal_data.ksnd_stall_rx != 0) {
                 set_current_state (TASK_UNINTERRUPTIBLE);
                 schedule_timeout(cfs_time_seconds (ksocknal_data.ksnd_stall_rx));
@@ -416,7 +416,7 @@ ksocknal_tx_launched (ksock_tx_t *tx)
 #if SOCKNAL_ZC
         if (atomic_read (&tx->tx_zccd.zccd_count) != 1) {
                 ksock_conn_t  *conn = tx->tx_conn;
-                
+
                 /* zccd skbufs are still in-flight.  First take a ref on
                  * conn, so it hangs about for ksocknal_tx_done... */
                 atomic_inc (&conn->ksnc_refcount);
@@ -437,7 +437,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         unsigned long  flags;
         int            rc;
-       
+
         rc = ksocknal_transmit (conn, tx);
 
         CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
@@ -472,7 +472,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
                                                    SOCKNAL_ENOMEM_RETRY),
                                    ksocknal_data.ksnd_reaper_waketime))
                         cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
-                
+
                 spin_unlock_irqrestore(&ksocknal_data.ksnd_reaper_lock, flags);
                 return (rc);
         }
@@ -494,7 +494,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
                                       HIPQUAD(conn->ksnc_ipaddr), rc);
                         break;
                 }
-                CERROR("[%p] Error %d on write to "LPX64
+                CDEBUG(D_HA, "[%p] Error %d on write to "LPX64
                        " ip %d.%d.%d.%d:%d\n", conn, rc,
                        conn->ksnc_peer->ksnp_nid,
                        HIPQUAD(conn->ksnc_ipaddr),
@@ -514,16 +514,16 @@ ksocknal_launch_autoconnect_locked (ksock_route_t *route)
 
         /* called holding write lock on ksnd_global_lock */
         LASSERT (!route->ksnr_connecting);
-        
+
         route->ksnr_connecting = 1;             /* scheduling conn for autoconnectd */
         atomic_inc (&route->ksnr_refcount);     /* extra ref for autoconnectd */
-        
+
         spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
-        
+
         list_add_tail (&route->ksnr_connect_list,
                        &ksocknal_data.ksnd_autoconnectd_routes);
         cfs_waitq_signal (&ksocknal_data.ksnd_autoconnectd_waitq);
-        
+
         spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags);
 }
 
@@ -639,7 +639,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
          * ksnc_sock... */
         LASSERT(!conn->ksnc_closing);
         LASSERT(tx->tx_resid == tx->tx_nob);
-        
+
         CDEBUG (D_NET, "Sending to "LPX64" ip %d.%d.%d.%d:%d\n", 
                 conn->ksnc_peer->ksnp_nid,
                 HIPQUAD(conn->ksnc_ipaddr),
@@ -664,7 +664,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
         }
 
         list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
-                
+
         if (conn->ksnc_tx_ready &&      /* able to send */
             !conn->ksnc_tx_scheduled) { /* not scheduled to send */
                 /* +1 ref for scheduler */
@@ -684,7 +684,7 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
         struct list_head  *tmp;
         ksock_route_t     *route;
         int                bits;
-        
+
         list_for_each (tmp, &peer->ksnp_routes) {
                 route = list_entry (tmp, ksock_route_t, ksnr_list);
                 bits  = route->ksnr_connected;
@@ -698,7 +698,7 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
                         if ((bits & (1 << SOCKNAL_CONN_ANY)) != 0)
                                 continue;
                 }
-                
+
                 /* connection being established? */
                 if (route->ksnr_connecting)
                         continue;
@@ -706,10 +706,10 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
                 /* too soon to retry this guy? */
                 if (!cfs_time_aftereq (cfs_time_current(), route->ksnr_timeout))
                         continue;
-                
+
                 return (route);
         }
-        
+
         return (NULL);
 }
 
@@ -721,11 +721,11 @@ ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
 
         list_for_each (tmp, &peer->ksnp_routes) {
                 route = list_entry (tmp, ksock_route_t, ksnr_list);
-                
+
                 if (route->ksnr_connecting)
                         return (route);
         }
-        
+
         return (NULL);
 }
 
@@ -737,7 +737,7 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
         ksock_conn_t     *conn;
         ksock_route_t    *route;
         rwlock_t         *g_lock;
-        
+
         /* Ensure the frags we've been given EXACTLY match the number of
          * bytes we want to send.  Many TCP/IP stacks disregard any total
          * size parameters passed to them and just look at the frags. 
@@ -777,7 +777,7 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
                         return (0);
                 }
         }
+
         /* I'll need a write lock... */
         read_unlock (g_lock);
 #endif
@@ -814,7 +814,7 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
                 write_unlock_irqrestore (g_lock, flags);
                 return (0);
         }
-        
+
         write_unlock_irqrestore (g_lock, flags);
         return (-EHOSTUNREACH);
 }
@@ -850,12 +850,12 @@ ksocknal_sendmsg(lib_nal_t     *nal,
         LASSERT (payload_kiov == NULL || !in_interrupt ());
         /* payload is either all vaddrs or all pages */
         LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-        
+
         if (payload_iov != NULL)
                 desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]);
         else
                 desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]);
-        
+
         if (in_interrupt() ||
             type == PTL_MSG_ACK ||
             type == PTL_MSG_REPLY) {
@@ -865,7 +865,7 @@ ksocknal_sendmsg(lib_nal_t     *nal,
         } else {
                 PORTAL_ALLOC(ltx, desc_size);
         }
-        
+
         if (ltx == NULL) {
                 CERROR("Can't allocate tx desc type %d size %d %s\n",
                        type, desc_size, in_interrupt() ? "(intr)" : "");
@@ -875,16 +875,16 @@ ksocknal_sendmsg(lib_nal_t     *nal,
         atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
 
         ltx->ltx_desc_size = desc_size;
-        
+
         /* We always have 1 mapped frag for the header */
         ltx->ltx_tx.tx_iov = ltx->ltx_iov;
         ltx->ltx_iov[0].iov_base = &ltx->ltx_hdr;
         ltx->ltx_iov[0].iov_len = sizeof(*hdr);
         ltx->ltx_hdr = *hdr;
-        
+
         ltx->ltx_private = private;
         ltx->ltx_cookie = cookie;
-        
+
         ltx->ltx_tx.tx_isfwd = 0;
         ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob;
 
@@ -911,7 +911,7 @@ ksocknal_sendmsg(lib_nal_t     *nal,
         rc = ksocknal_launch_packet(&ltx->ltx_tx, nid);
         if (rc == 0)
                 return (PTL_OK);
-        
+
         ksocknal_free_ltx(ltx);
         return (PTL_FAIL);
 }
@@ -946,7 +946,7 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
         ptl_nid_t     nid = fwd->kprfd_gateway_nid;
         ksock_ftx_t  *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch;
         int           rc;
-        
+
         CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
                 fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
 
@@ -1144,7 +1144,7 @@ ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
 
         conn->ksnc_cookie = fmb;                /* stash fmb for later */
         conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
-        
+
         /* Set up conn->ksnc_rx_kiov to read the payload into fmb's kiov-ed
          * buffer */
         LASSERT (niov <= sizeof(conn->ksnc_rx_iov_space)/sizeof(ptl_kiov_t));
@@ -1153,7 +1153,7 @@ ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
         conn->ksnc_rx_nkiov = niov;
         conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
         memcpy(conn->ksnc_rx_kiov, fmb->fmb_kiov, niov * sizeof(ptl_kiov_t));
-        
+
         CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
                 le64_to_cpu(conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
         return (0);
@@ -1238,7 +1238,7 @@ ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip)
         if (nob_to_skip == 0) {         /* right at next packet boundary now */
                 conn->ksnc_rx_started = 0;
                 mb ();                          /* racing with timeout thread */
-                
+
                 conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
                 conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t);
                 conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t);
@@ -1286,7 +1286,7 @@ ksocknal_process_receive (ksock_conn_t *conn)
 {
         ksock_fmb_t  *fmb;
         int           rc;
-        
+
         LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
 
         /* doesn't need a forwarding buffer */
@@ -1339,7 +1339,7 @@ ksocknal_process_receive (ksock_conn_t *conn)
                 /* short read */
                 return (-EAGAIN);
         }
-        
+
         switch (conn->ksnc_rx_state) {
         case SOCKNAL_RX_HEADER:
                 if (conn->ksnc_hdr.type != cpu_to_le32(PTL_MSG_HELLO) &&
@@ -1422,7 +1422,7 @@ ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
 
         LASSERT (mlen <= rlen);
         LASSERT (niov <= PTL_MD_MAX_IOV);
-        
+
         conn->ksnc_cookie = msg;
         conn->ksnc_rx_nob_wanted = mlen;
         conn->ksnc_rx_nob_left   = rlen;
@@ -1450,7 +1450,7 @@ ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
 
         LASSERT (mlen <= rlen);
         LASSERT (niov <= PTL_MD_MAX_IOV);
-        
+
         conn->ksnc_cookie = msg;
         conn->ksnc_rx_nob_wanted = mlen;
         conn->ksnc_rx_nob_left   = rlen;
@@ -1483,7 +1483,7 @@ ksocknal_sched_cansleep(ksock_sched_t *sched)
 #endif
               list_empty(&sched->kss_rx_conns) &&
               list_empty(&sched->kss_tx_conns));
-        
+
         spin_unlock_irqrestore(&sched->kss_lock, flags);
         return (rc);
 }
@@ -1571,16 +1571,16 @@ int ksocknal_scheduler (void *arg)
                         conn = list_entry(sched->kss_tx_conns.next,
                                           ksock_conn_t, ksnc_tx_list);
                         list_del (&conn->ksnc_tx_list);
-                        
+
                         LASSERT(conn->ksnc_tx_scheduled);
                         LASSERT(conn->ksnc_tx_ready);
                         LASSERT(!list_empty(&conn->ksnc_tx_queue));
-                        
+
                         tx = list_entry(conn->ksnc_tx_queue.next,
                                         ksock_tx_t, tx_list);
                         /* dequeue now so empty list => more to send */
                         list_del(&tx->tx_list);
-                        
+
                         /* Clear tx_ready in case send isn't complete.  Do
                          * it BEFORE we call process_transmit, since
                          * write_space can set it any time after we release
@@ -1613,7 +1613,7 @@ int ksocknal_scheduler (void *arg)
                                 /* drop my ref */
                                 ksocknal_put_conn (conn);
                         }
-                                
+
                         did_something = 1;
                 }
 #if SOCKNAL_ZC
@@ -1692,7 +1692,7 @@ void ksocknal_write_callback (ksock_conn_t *conn)
         ksock_sched_t *sched; 
         unsigned long  flags;
         ENTRY;
-        
+
         sched = conn->ksnc_scheduler; 
 
         spin_lock_irqsave (&sched->kss_lock, flags); 
@@ -1763,10 +1763,10 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs)
                         rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
                 return (rc);
         }
-        
+
         if (nipaddrs == 0)
                 return (0);
-        
+
         for (i = 0; i < nipaddrs; i++) {
                 ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]);
         }
@@ -1879,11 +1879,7 @@ ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid,
                                portals_nid2str(SOCKNAL,
                                                le64_to_cpu(hdr.src_nid),
                                                ipbuf));
-                               
-                CERROR ("Connected to nid "LPX64"@%u.%u.%u.%u "
-                        "but expecting "LPX64"\n",
-                        le64_to_cpu (hdr.src_nid),
-                        HIPQUAD(conn->ksnc_ipaddr), *nid);
+
                 return (-EPROTO);
         }
 
@@ -1917,7 +1913,7 @@ ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid,
 
         if (nips == 0)
                 return (0);
-        
+
         rc = ksocknal_sock_read (sock, ipaddrs, nips * sizeof(*ipaddrs));
         if (rc != 0) {
                 CERROR ("Error %d reading IPs from "LPX64"@%u.%u.%u.%u\n",
@@ -1927,7 +1923,7 @@ ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid,
 
         for (i = 0; i < nips; i++) {
                 ipaddrs[i] = __le32_to_cpu(ipaddrs[i]);
-                
+
                 if (ipaddrs[i] == 0) {
                         CERROR("Zero IP[%d] from "LPX64"@%u.%u.%u.%u\n",
                                i, *nid, HIPQUAD(conn->ksnc_ipaddr));
@@ -1945,7 +1941,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type)
         int                 rc;
         int                 port;
         int                 may_retry;
-        
+
         /* Iterate through reserved ports.  When typed connections are
          * used, we will need to bind to multiple ports, but we only know
          * this at connect time.  But, by that time we've already called
@@ -2191,16 +2187,16 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer)
                                 break;
                         default:
                                 LCONSOLE_WARN("An unexpected network error "
-                                              "occurred with %u.%u.%u.%u: %d.\n",
+                                              "occurred with %u.%u.%u.%u: %d\n",
                                               HIPQUAD(conn->ksnc_ipaddr),
                                               SOCK_ERROR(conn->ksnc_sock));
                                 break;
                         }
 
                         /* Something (e.g. failed keepalive) set the socket error */
-                        CERROR ("Socket error %d: "LPX64" %p %d.%d.%d.%d\n",
-                                SOCK_ERROR(conn->ksnc_sock), peer->ksnp_nid,
-                                conn, HIPQUAD(conn->ksnc_ipaddr));
+                        CDEBUG(D_HA,"Socket error %d: "LPX64" %p %d.%d.%d.%d\n",
+                               SOCK_ERROR(conn->ksnc_sock), peer->ksnp_nid,
+                               conn, HIPQUAD(conn->ksnc_ipaddr));
 
                         return (conn);
                 }
@@ -2259,16 +2255,15 @@ ksocknal_check_peer_timeouts (int idx)
         list_for_each (ptmp, peers) {
                 peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
                 conn = ksocknal_find_timed_out_conn (peer);
-                
+
                 if (conn != NULL) {
                         read_unlock (&ksocknal_data.ksnd_global_lock);
 
-                        CERROR ("Timeout out conn->"LPX64" ip %d.%d.%d.%d:%d\n",
-                                peer->ksnp_nid,
-                                HIPQUAD(conn->ksnc_ipaddr),
-                                conn->ksnc_port);
+                        CERROR("Timeout out conn->"LPX64" ip %d.%d.%d.%d:%d\n",
+                               peer->ksnp_nid, HIPQUAD(conn->ksnc_ipaddr),
+                               conn->ksnc_port);
                         ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
-                        
+
                         /* NB we won't find this one again, but we can't
                          * just proceed with the next peer, since we dropped
                          * ksnd_global_lock and it might be dead already! */
@@ -2308,7 +2303,7 @@ ksocknal_reaper (void *arg)
                         conn = list_entry (ksocknal_data.ksnd_deathrow_conns.next,
                                            ksock_conn_t, ksnc_list);
                         list_del (&conn->ksnc_list);
-                        
+
                         spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
 
                         ksocknal_terminate_conn (conn);
@@ -2322,7 +2317,7 @@ ksocknal_reaper (void *arg)
                         conn = list_entry (ksocknal_data.ksnd_zombie_conns.next,
                                            ksock_conn_t, ksnc_list);
                         list_del (&conn->ksnc_list);
-                        
+
                         spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
 
                         ksocknal_destroy_conn (conn);
index efa51aa..f2b48d5 100644 (file)
@@ -36,6 +36,7 @@ extern unsigned int portal_debug;
 extern char debug_file_path[1024];
 extern unsigned int portal_subsystem_debug;
 extern unsigned int portal_printk;
+extern unsigned int portals_catastrophe;
 extern atomic_t portal_kmemory;
 
 extern long max_debug_mb;
@@ -68,6 +69,7 @@ SYSCTL_PROC(_portals,                 OID_AUTO,       trace_daemon,
 SYSCTL_PROC(_portals,                  OID_AUTO,       debug_mb,
             CTLTYPE_INT | CTLFLAG_RW,                  &max_debug_mb,
             0,         &cfs_debug_mb,                  "L",    "max debug size");
+#warning "add 'catastrophe' entry for LBUG detection"
 
 
 static cfs_sysctl_table_t      top_table[] = {
index 3c9a99f..9f0ce91 100644 (file)
@@ -31,7 +31,7 @@
 
 #include "tracefile.h"
 
-unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL);
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS);
 EXPORT_SYMBOL(portal_subsystem_debug);
 
 unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
@@ -45,6 +45,9 @@ EXPORT_SYMBOL(portal_printk);
 unsigned int portal_stack;
 EXPORT_SYMBOL(portal_stack);
 
+unsigned int portals_catastrophe;
+EXPORT_SYMBOL(portals_catastrophe);
+
 #ifdef __KERNEL__
 atomic_t portal_kmemory = ATOMIC_INIT(0);
 EXPORT_SYMBOL(portal_kmemory);
index 70f4059..77277ba 100644 (file)
@@ -71,6 +71,7 @@ enum {
         PSDEV_DEBUG_DUMP_PATH,    /* crashdump tracelog location */
         PSDEV_PORTALS_UPCALL,     /* User mode upcall script  */
         PSDEV_PORTALS_MEMUSED,    /* bytes currently PORTAL_ALLOCated */
+        PSDEV_PORTALS_CATASTROPHE,/* if we have LBUGged or panic'd */
 };
 
 static struct ctl_table portals_table[] = {
@@ -86,7 +87,9 @@ static struct ctl_table portals_table[] = {
          sizeof(portals_upcall), 0644, NULL, &proc_dostring,
          &sysctl_string},
         {PSDEV_PORTALS_MEMUSED, "memused", (int *)&portal_kmemory.counter,
-         sizeof(int), 0644, NULL, &proc_dointvec},
+         sizeof(int), 0444, NULL, &proc_dointvec},
+        {PSDEV_PORTALS_CATASTROPHE, "catastrophe", &portals_catastrophe,
+         sizeof(int), 0444, NULL, &proc_dointvec},
         {0}
 };
 
index e5bb46b..a4d1804 100644 (file)
 /* should get this from autoconf somehow */
 #ifndef PIDFILE_DIR
 #define PIDFILE_DIR "/var/run"
-#endif 
+#endif
 
 #define PROGNAME "acceptor"
 
 #ifdef HAVE_LIBWRAP
 /* needed because libwrap declares these as externs */
-int     allow_severity = LOG_INFO;
-int     deny_severity = LOG_WARNING;
+int allow_severity = LOG_INFO;
+int deny_severity = LOG_WARNING;
 #endif
 
+void usage(char *myname)
+{
+        fprintf(stderr, "usage: %s [-N nal_id] [-p] [-l] port\n\n"
+                " -l\tKeep stdin/stdout open\n"
+                " -p\tAllow connections from non-privileged ports\n", myname);
+        exit (1);
+}
+
 void create_pidfile(char *name, int port)
 {
         char pidfile[1024];
         FILE *fp;
 
-        snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", 
+        snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid",
                  PIDFILE_DIR, name, port);
-        
+
         if ((fp = fopen(pidfile, "w"))) {
                 fprintf(fp, "%d\n", getpid());
                 fclose(fp);
         } else {
-                syslog(LOG_ERR, "%s: %s\n", pidfile, 
+                syslog(LOG_ERR, "%s: %s\n", pidfile,
                        strerror(errno));
         }
 }
@@ -58,43 +66,43 @@ int pidfile_exists(char *name, int port)
 {
         char pidfile[1024];
 
-        snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", 
+        snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid",
                  PIDFILE_DIR, name, port);
-        
+
         if (!access(pidfile, F_OK)) {
-                fprintf(stderr, "%s: exists, acceptor already running.\n", 
+                fprintf(stderr, "%s: exists, acceptor already running.\n",
                         pidfile);
                 return (1);
-        } 
+        }
         return (0);
 }
 
 void
 show_connection (int fd, __u32 net_ip)
 {
-        struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET);
-        __u32 host_ip = ntohl (net_ip);
+        static long last_time;
+        static __u32 host_ip;
+        long now = time(0);
+        struct hostent *h;
         int  len;
         char host[1024];
-        
+
+        /* Don't show repeats for same host, it adds no value */
+        if (host_ip == ntohl(net_ip) && (now - last_time) < 5)
+                return;
+
+        h = gethostbyaddr((char *)&net_ip, sizeof(net_ip), AF_INET);
+        last_time = now;
+        host_ip = ntohl(net_ip);
+
         if (h == NULL)
-                snprintf (host, sizeof(host), "%d.%d.%d.%d", (host_ip >> 24) & 0xff,
-                                    (host_ip >> 16) & 0xff, (host_ip >> 8) & 0xff, host_ip & 0xff);
+                snprintf(host, sizeof(host), "%d.%d.%d.%d",
+                         (host_ip >> 24) & 0xff, (host_ip >> 16) & 0xff,
+                         (host_ip >> 8)  & 0xff, host_ip & 0xff);
         else
-                snprintf (host, sizeof(host), "%s", h->h_name);
-                
-        syslog (LOG_INFO, "Accepted host: %s\n", host);
-}
+                snprintf(host, sizeof(host), "%s", h->h_name);
 
-void
-usage (char *myname)
-{
-        fprintf (stderr, 
-                 "Usage: %s [-N nal_id] [-p] [-l] port\n\n"
-                 " -l\tKeep stdin/stdout open\n"
-                 " -p\tAllow connections from non-privileged ports\n",
-                 myname);
-        exit (1);
+        syslog(LOG_INFO, "Accepted host: %s\n", host);
 }
 
 int main(int argc, char **argv)
@@ -106,7 +114,7 @@ int main(int argc, char **argv)
         int nal = SOCKNAL;
         int rport;
         int require_privports = 1;
-        
+
         while ((c = getopt (argc, argv, "N:lp")) != -1) {
                 switch (c) {
                 case 'N':
@@ -189,7 +197,7 @@ int main(int argc, char **argv)
                 struct request_info request;
 #endif
                 char addrstr[INET_ADDRSTRLEN];
-               
+
                 cfd = accept(fd, (struct sockaddr *)&clntaddr, &len);
                 if ( cfd < 0 ) {
                         perror("accept");
@@ -228,11 +236,11 @@ int main(int argc, char **argv)
                 pcfg.pcfg_nal = nal;
                 pcfg.pcfg_fd = cfd;
                 pcfg.pcfg_misc = SOCKNAL_CONN_NONE; /* == incoming connection */
-                
+
                 PORTAL_IOC_INIT(data);
                 data.ioc_pbuf1 = (char*)&pcfg;
                 data.ioc_plen1 = sizeof(pcfg);
-                
+
                 if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) {
                         perror("ioctl failed");
                 } else {