Whamcloud - gitweb
merge gmnal code in b_llp2 to HEAD
authorhitao <hitao>
Mon, 1 Mar 2004 15:15:19 +0000 (15:15 +0000)
committerhitao <hitao>
Mon, 1 Mar 2004 15:15:19 +0000 (15:15 +0000)
14 files changed:
lnet/klnds/gmlnd/gmlnd.h
lnet/klnds/gmlnd/gmlnd_api.c
lnet/klnds/gmlnd/gmlnd_cb.c
lnet/klnds/gmlnd/gmlnd_comm.c
lnet/klnds/gmlnd/gmlnd_module.c
lnet/klnds/gmlnd/gmlnd_utils.c
lnet/utils/gmlndnid.c
lustre/portals/knals/gmnal/gmnal.h
lustre/portals/knals/gmnal/gmnal_api.c
lustre/portals/knals/gmnal/gmnal_cb.c
lustre/portals/knals/gmnal/gmnal_comm.c
lustre/portals/knals/gmnal/gmnal_module.c
lustre/portals/knals/gmnal/gmnal_utils.c
lustre/portals/utils/gmnalnid.c

index cdde5b7..ad46b90 100644 (file)
@@ -45,6 +45,7 @@
 #include "linux/init.h"
 #include "linux/sem.h"
 #include "linux/vmalloc.h"
+#include "linux/sysctl.h"
 
 #define DEBUG_SUBSYSTEM S_GMNAL
 
 extern  int gmnal_small_msg_size;
 extern  int num_rx_threads;
 extern  int num_stxds;
+extern  int gm_port;
 #define GMNAL_SMALL_MSG_SIZE(a)                a->small_msg_size
 #define GMNAL_IS_SMALL_MESSAGE(n,a,b,c)        gmnal_is_small_msg(n, a, b, c)
 #define GMNAL_MAGIC                            0x1234abcd
+/*
+ *     The gm_port to use for gmnal
+ */
+#define GMNAL_GM_PORT  gm_port
 
 
 /*
@@ -218,6 +224,7 @@ typedef struct _gmnal_data_t {
        gmnal_rxtwe_t   *rxtwe_tail;
        spinlock_t      rxtwe_lock;
        struct  semaphore rxtwe_wait;
+        struct ctl_table_header *sysctl;
 } gmnal_data_t;
 
 /*
@@ -234,11 +241,6 @@ typedef struct _gmnal_data_t {
 extern gmnal_data_t    *global_nal_data;
 
 /*
- *     The gm_port to use for gmnal
- */
-#define GMNAL_GM_PORT  4
-
-/*
  * for ioctl get pid
  */
 #define GMNAL_IOC_GET_GNID 1   
@@ -353,6 +355,8 @@ int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
 
 int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
 
+int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
+
 void *gmnal_cb_malloc(nal_cb_t *, size_t);
 
 void gmnal_cb_free(nal_cb_t *, void *, size_t);
@@ -382,7 +386,7 @@ void  gmnal_fini(void);
                                a->cb_recv_pages = gmnal_cb_recv_pages; \
                                a->cb_read = gmnal_cb_read; \
                                a->cb_write = gmnal_cb_write; \
-                               a->cb_callback = NULL; \
+                               a->cb_callback = gmnal_cb_callback; \
                                a->cb_malloc = gmnal_cb_malloc; \
                                a->cb_free = gmnal_cb_free; \
                                a->cb_map = NULL; \
@@ -418,6 +422,7 @@ void                gmnal_stop_rxthread(gmnal_data_t *);
 void           gmnal_stop_ctthread(gmnal_data_t *);
 void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
 void           gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
+void           gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
 char           *gmnal_gm_error(gm_status_t);
 char           *gmnal_rxevent(gm_recv_event_t*);
 int            gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int);
index 1cb1317..1442aa7 100644 (file)
 
 #include "gmnal.h"
 
+
+
 gmnal_data_t   *global_nal_data = NULL;
+#define         GLOBAL_NID_STR_LEN      16
+char            global_nid_str[GLOBAL_NID_STR_LEN] = {0};
+
+/*
+ *      Write the global nid /proc/sys/gmnal/globalnid
+ */
+#define GMNAL_SYSCTL    201
+#define GMNAL_SYSCTL_GLOBALNID  1
+
+static ctl_table gmnal_sysctl_table[] = {
+        {GMNAL_SYSCTL_GLOBALNID, "globalnid",
+         global_nid_str, GLOBAL_NID_STR_LEN,
+         0444, NULL, &proc_dostring},
+        { 0 }
+};
+
+
+static ctl_table gmnalnal_top_sysctl_table[] = {
+        {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table},
+        { 0 }
+};
+
+
+
+
+
+
 /*
  *     gmnal_api_forward
  *     This function takes a pack block of arguments from the NAL API
@@ -193,8 +222,8 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        ptl_pid_t       portals_pid = 0;
 
 
-       CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], 
-              ac_size[%d]\n", interface, ptl_size, ac_size);
+       CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], "
+              "ac_size[%d]\n", interface, ptl_size, ac_size);
 
 
        PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
@@ -255,8 +284,8 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        }
 
 
-       CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], 
-                      name [%s], version [%d]\n", interface, GMNAL_GM_PORT, 
+       CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], "
+                      "name [%s], version [%d]\n", interface, GMNAL_GM_PORT, 
               "gmnal", GM_API_VERSION);
 
        GMNAL_GM_LOCK(nal_data);
@@ -280,15 +309,15 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
                        CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
                        break;
                case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib 
-                              and driver\n");
+                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
+                              "and driver\n");
                        break;
                case(GM_OUT_OF_MEMORY):
                        CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
                        break;
                default:
-                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error 
-                              code [%d]\n", gm_status);
+                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
+                              "code [%d]\n", gm_status);
                        break;
                }       
                GMNAL_GM_LOCK(nal_data);
@@ -403,6 +432,7 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        }
        CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
        nal_data->gm_global_nid = global_nid;
+        snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid);
 
 /*
        pid = gm_getpid();
@@ -429,6 +459,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
                return(NULL);
                
        }
+        nal_data->sysctl = NULL;
+        nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0);
+
        
        CDEBUG(D_INFO, "gmnal_init finished\n");
        global_nal_data = nal->nal_data;
@@ -459,6 +492,8 @@ void gmnal_fini()
        gm_close(nal_data->gm_port);
        gm_finalize();
        GMNAL_GM_UNLOCK(nal_data);
+        if (nal_data->sysctl)
+                unregister_sysctl_table (nal_data->sysctl);
        PORTAL_FREE(nal, sizeof(nal_t));        
        PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
        PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
index e055242..d95922b 100644 (file)
@@ -35,8 +35,8 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], 
-              niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
+       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+              "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
               nal_cb, private, cookie, niov, iov, mlen, rlen);
 
        switch(srxd->type) {
@@ -64,10 +64,11 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
        struct iovec    *iovec = NULL, *iovec_dup = NULL;
        int             i = 0;
+       ptl_kiov_t      *kiov_dup = kiov;;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], 
-              cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+              "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
               nal_cb, private, cookie, kniov, kiov, mlen, rlen);
 
        if (srxd->type == GMNAL_SMALL_MESSAGE) {
@@ -99,6 +100,10 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                CDEBUG(D_INFO, "calling gmnal_small_rx\n");
                status = gmnal_small_rx(nal_cb, private, cookie, kniov, 
                                         iovec_dup, mlen, rlen);
+               for (i=0; i<kniov; i++) {
+                       kunmap(kiov_dup->kiov_page);
+                       kiov_dup++;
+               }
                PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
        }
                
@@ -126,6 +131,7 @@ int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                niov, iov, len);
        } else {
                CDEBUG(D_ERROR, "Large message send it is not supported\n");
+               lib_finalize(nal_cb, private, cookie);
                return(PTL_FAIL);
                gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, 
                                niov, iov, len);
@@ -140,6 +146,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int     i = 0;
        gmnal_data_t    *nal_data;
        struct  iovec   *iovec = NULL, *iovec_dup = NULL;
+       ptl_kiov_t      *kiov_dup = kiov;
 
        CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
        nal_data = nal_cb->nal_data;
@@ -181,6 +188,10 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, 
                                pid, kniov, iovec, len);
        }
+       for (i=0; i<kniov; i++) {
+               kunmap(kiov_dup->kiov_page);
+               kiov_dup++;
+       }
        PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
        return(PTL_OK);
 }
@@ -199,6 +210,18 @@ int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst,
        return(PTL_OK);
 }
 
+int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, 
+                      ptl_event_t *ev)
+{
+
+       if (eq->event_callback != NULL) {
+               CDEBUG(D_INFO, "found callback\n");
+               eq->event_callback(ev);
+       }
+       
+       return(PTL_OK);
+}
+
 void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
 {
        void *ptr = NULL;
index a0d3530..bdc87f6 100644 (file)
@@ -203,14 +203,14 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
        gmnal_msghdr = (gmnal_msghdr_t*)buffer;
        portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE);
 
-       CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], 
-              type [%d], length [%d], buffer [%p]\n",
+       CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], "
+              "type [%d], length [%d], buffer [%p]\n",
               snode, sport, type, length, buffer);
-       CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], 
-              gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
+       CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], "
+              "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
               gmnal_msghdr->magic, gmnal_msghdr->type);
-       CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], 
-              dest_node ["LPD64"]\n", portals_hdr->src_nid, 
+       CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], "
+              "dest_node ["LPD64"]\n", portals_hdr->src_nid, 
               portals_hdr->dest_nid);
 
        
@@ -321,6 +321,7 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
 
        if (!private) {
                CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
+               lib_finalize(nal_cb, private, cookie);
                return(PTL_FAIL);
        }
 
@@ -342,8 +343,10 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
         *      let portals library know receive is complete
         */
        CDEBUG(D_PORTALS, "calling lib_finalize\n");
-       lib_finalize(nal_cb, private, cookie, PTL_OK);
-
+       if (lib_finalize(nal_cb, private, cookie) != PTL_OK) {
+               /* TO DO what to do with failed lib_finalise? */
+               CDEBUG(D_INFO, "lib_finalize failed\n");
+       }
        /*
         *      return buffer so it can be used again
         */
@@ -377,9 +380,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        unsigned int    local_nid;
        gm_status_t     gm_status = GM_SUCCESS;
 
-       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] 
-              hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] 
-              iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
+       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+              "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
+              "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
@@ -440,9 +443,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        stxd->msg_size = tot_size;
 
 
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] 
-              gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] 
-              stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+              "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
               stxd->msg_size, global_nid, local_nid, stxd);
 
        GMNAL_GM_LOCK(nal_data);
@@ -493,8 +496,8 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                /*
                 *      do a resend on the dropped ones
                 */
-                       CDEBUG(D_ERROR, "send stxd [%p] was dropped 
-                              resending\n", context);
+                       CDEBUG(D_ERROR, "send stxd [%p] was dropped "
+                              "resending\n", context);
                        GMNAL_GM_LOCK(nal_data);
                        gm_send_to_peer_with_callback(nal_data->gm_port, 
                                                      stxd->buffer, 
@@ -569,6 +572,11 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                case(GM_YP_NO_MATCH):
                default:
                        CDEBUG(D_ERROR, "Unknown send error\n");
+                gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
+                                      stxd->gm_target_node, GMNAL_GM_PORT,
+                                      gmnal_resume_sending_callback, context);
+                return;
+
        }
 
        /*
@@ -587,11 +595,26 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                return;
        }
        gmnal_return_stxd(nal_data, stxd);
-       lib_finalize(nal_cb, stxd, cookie, PTL_OK);
-
+       if (lib_finalize(nal_cb, stxd, cookie) != PTL_OK) {
+               CDEBUG(D_INFO, "Call to lib_finalize failed for stxd [%p]\n", 
+                      stxd);
+       }
        return;
 }
 
+/*
+ *     After an error on the port
+ *     call this to allow future sends to complete
+ */
+void gmnal_resume_sending_callback(struct gm_port *gm_port, void *context,
+                                 gm_status_t status)
+{
+        gmnal_data_t    *nal_data;
+        gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
+        CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
+        gmnal_return_stxd(stxd->nal_data, stxd);
+        return;
+}
 
 
 void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, 
@@ -611,8 +634,8 @@ void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
                                              context);
                GMNAL_GM_LOCK(nal_data);
        } else {
-               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is 
-                      [%d][%s]\n", stxd, status, gmnal_gm_error(status));
+               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
+                      "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
        }
 
 
@@ -644,9 +667,9 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             niov_dup;
 
 
-       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] 
-              hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], 
-              iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
+       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+              "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
+              "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        if (nal_cb)
@@ -729,8 +752,8 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                               iov->iov_base, iov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] 
-                              for memory [%p] len ["LPSZ"]\n", 
+                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+                              "for memory [%p] len ["LPSZ"]\n", 
                               gm_status, gmnal_gm_error(gm_status), 
                               iov->iov_base, iov->iov_len);
                        GMNAL_GM_LOCK(nal_data);
@@ -806,12 +829,13 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        gmnal_msghdr_t  *msghdr = NULL;
        gm_status_t     gm_status;
 
-       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], 
-              cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+              "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
                nal_cb, private, cookie, nriov, riov, mlen, rlen);
 
        if (!srxd) {
                CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
+               lib_finalize(nal_cb, private, cookie);
                return(PTL_FAIL);
        }
 
@@ -846,8 +870,8 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                               riov->iov_base, riov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] 
-                              for memory [%p] len ["LPSZ"]\n", 
+                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+                              "for memory [%p] len ["LPSZ"]\n", 
                               gm_status, gmnal_gm_error(gm_status), 
                               riov->iov_base, riov->iov_len);
                        GMNAL_GM_LOCK(nal_data);
@@ -902,8 +926,8 @@ gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov,
 
        int     ncalls = 0;
 
-       CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], 
-              nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
+       CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], "
+              "nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
 
 
        ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov);
@@ -958,8 +982,8 @@ gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
                                            srxd->gm_source_node, 
                                            &source_node) != GM_SUCCESS) {
 
-                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] 
-                              to local node_id\n", srxd->gm_source_node);
+                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] "
+                              "to local node_id\n", srxd->gm_source_node);
                        GMNAL_GM_UNLOCK(nal_data);
                        return(GMNAL_STATUS_FAIL);
                }
@@ -1108,7 +1132,10 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
         *      Let our client application proceed
         */     
        CDEBUG(D_ERROR, "final callback context[%p]\n", srxd);
-       lib_finalize(nal_cb, srxd, srxd->cookie, PTL_OK);
+       if (lib_finalize(nal_cb, srxd, srxd->cookie) != PTL_OK) {
+               CDEBUG(D_INFO, "Call to lib_finalize failed for srxd [%p]\n", 
+                      srxd);
+       }
 
        /*
         *      send an ack to the sender to let him know we got the data
@@ -1201,9 +1228,9 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
        stxd->msg_size= sizeof(gmnal_msghdr_t);
 
 
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] 
-              gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] 
-              stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+              "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] "
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
               stxd->msg_size, srxd->gm_source_node, local_nid, stxd);
        GMNAL_GM_LOCK(nal_data);
        stxd->gm_priority = GM_LOW_PRIORITY;
@@ -1273,7 +1300,10 @@ gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
 
        CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
 
-       lib_finalize(nal_cb, stxd, stxd->cookie, PTL_OK);
+       if (lib_finalize(nal_cb, stxd, stxd->cookie) != PTL_OK) {
+               CDEBUG(D_INFO, "Call to lib_finalize failed for stxd [%p]\n", 
+                      stxd);
+       }
 
        /*
         *      extract the iovec from the stxd, deregister the memory.
index 1260629..31f6819 100644 (file)
@@ -30,6 +30,7 @@ int gmnal_small_msg_size = 525312;
  */
 int num_rx_threads = -1;
 int num_stxds = 5;
+int gm_port = 4;
 
 ptl_handle_ni_t        kgmnal_ni;
 
@@ -139,6 +140,7 @@ EXPORT_SYMBOL(kgmnal_ni);
 MODULE_PARM(gmnal_small_msg_size, "i");
 MODULE_PARM(num_rx_threads, "i");
 MODULE_PARM(num_stxds, "i");
+MODULE_PARM(gm_port, "i");
 
 MODULE_AUTHOR("Morgan Doyle");
 
index 55606f3..6a52319 100644 (file)
@@ -117,8 +117,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -131,8 +131,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
 
                txd->next = nal_data->stxd;
                nal_data->stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
        }
 
        for (i=0; i<=nrxt_stx; i++) {
@@ -146,8 +146,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -160,8 +160,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
 
                txd->next = nal_data->rxt_stxd;
                nal_data->rxt_stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
        }
 
        /*
@@ -187,8 +187,8 @@ gmnal_free_txd(gmnal_data_t *nal_data)
        CDEBUG(D_TRACE, "gmnal_free_small tx\n");
 
        while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
                _txd = txd;
                txd = txd->next;
                GMNAL_GM_LOCK(nal_data);
@@ -198,8 +198,8 @@ gmnal_free_txd(gmnal_data_t *nal_data)
        }
         txd = nal_data->rxt_stxd;
        while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
                _txd = txd;
                txd = txd->next;
                GMNAL_GM_LOCK(nal_data);
@@ -392,22 +392,22 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
 #if 0
                PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], 
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], "
+                              "size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
                }
-               CDEBUG(D_NET, "Calling gm_register_memory with port [%p] 
-                      rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
+               CDEBUG(D_NET, "Calling gm_register_memory with port [%p] "
+                      "rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
                       rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_LOCK(nal_data);
                gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, 
                                               GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (gm_status != GM_SUCCESS) {
-                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],
-                              index [%d]\n", rxbuffer, i);
+                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],"
+                              index [%d]\n", rxbuffer, i);
                        switch(gm_status) {
                                case(GM_FAILURE):
                                        CDEBUG(D_ERROR, "GM_FAILURE\n");
@@ -432,8 +432,8 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -447,15 +447,15 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                if (gm_hash_insert(nal_data->srxd_hash, 
                                   (void*)rxbuffer, (void*)rxd)) {
 
-                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] 
-                              for rxbuffer[%p]\n", rxd, rxbuffer);
+                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] "
+                              "for rxbuffer[%p]\n", rxd, rxbuffer);
                        return(GMNAL_STATUS_FAIL);
                }
 
                rxd->next = nal_data->srxd;
                nal_data->srxd = rxd;
-               CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], 
-                      size [%d]\n", rxd, rxd->buffer, rxd->size);
+               CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], "
+                      "size [%d]\n", rxd, rxd->buffer, rxd->size);
        }
 
        return(GMNAL_STATUS_OK);
@@ -623,6 +623,8 @@ gmnal_stop_ctthread(gmnal_data_t *nal_data)
 char * 
 gmnal_gm_error(gm_status_t status)
 {
+       return(gm_strerror(status));
+
        switch(status) {
                case(GM_SUCCESS):
                        return("SUCCESS");
@@ -972,7 +974,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
                }
                spin_lock(&nal_data->rxtwe_lock);
                if (nal_data->rxtwe_head) {
-                       CDEBUG(D_WARNING, "Got a work entry\n");
+                       CDEBUG(D_INFO, "Got a work entry\n");
                        we = nal_data->rxtwe_head;
                        nal_data->rxtwe_head = we->next;
                        if (!nal_data->rxtwe_head)
@@ -983,7 +985,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
                spin_unlock(&nal_data->rxtwe_lock);
        } while (!we);
 
-       CDEBUG(D_WARNING, "Returning we[%p]\n", we);
+       CDEBUG(D_INFO, "Returning we[%p]\n", we);
        return(we);
 }
 
index 84ac97f..ff6631c 100644 (file)
@@ -115,5 +115,5 @@ int main(int argc, char **argv)
        free(pcfg.pcfg_pbuf1);
        close(pfd);
        printf("%u\n", nid);
-        exit(nid);
+        exit(0);
 }
index cdde5b7..ad46b90 100644 (file)
@@ -45,6 +45,7 @@
 #include "linux/init.h"
 #include "linux/sem.h"
 #include "linux/vmalloc.h"
+#include "linux/sysctl.h"
 
 #define DEBUG_SUBSYSTEM S_GMNAL
 
 extern  int gmnal_small_msg_size;
 extern  int num_rx_threads;
 extern  int num_stxds;
+extern  int gm_port;
 #define GMNAL_SMALL_MSG_SIZE(a)                a->small_msg_size
 #define GMNAL_IS_SMALL_MESSAGE(n,a,b,c)        gmnal_is_small_msg(n, a, b, c)
 #define GMNAL_MAGIC                            0x1234abcd
+/*
+ *     The gm_port to use for gmnal
+ */
+#define GMNAL_GM_PORT  gm_port
 
 
 /*
@@ -218,6 +224,7 @@ typedef struct _gmnal_data_t {
        gmnal_rxtwe_t   *rxtwe_tail;
        spinlock_t      rxtwe_lock;
        struct  semaphore rxtwe_wait;
+        struct ctl_table_header *sysctl;
 } gmnal_data_t;
 
 /*
@@ -234,11 +241,6 @@ typedef struct _gmnal_data_t {
 extern gmnal_data_t    *global_nal_data;
 
 /*
- *     The gm_port to use for gmnal
- */
-#define GMNAL_GM_PORT  4
-
-/*
  * for ioctl get pid
  */
 #define GMNAL_IOC_GET_GNID 1   
@@ -353,6 +355,8 @@ int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
 
 int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
 
+int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
+
 void *gmnal_cb_malloc(nal_cb_t *, size_t);
 
 void gmnal_cb_free(nal_cb_t *, void *, size_t);
@@ -382,7 +386,7 @@ void  gmnal_fini(void);
                                a->cb_recv_pages = gmnal_cb_recv_pages; \
                                a->cb_read = gmnal_cb_read; \
                                a->cb_write = gmnal_cb_write; \
-                               a->cb_callback = NULL; \
+                               a->cb_callback = gmnal_cb_callback; \
                                a->cb_malloc = gmnal_cb_malloc; \
                                a->cb_free = gmnal_cb_free; \
                                a->cb_map = NULL; \
@@ -418,6 +422,7 @@ void                gmnal_stop_rxthread(gmnal_data_t *);
 void           gmnal_stop_ctthread(gmnal_data_t *);
 void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
 void           gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
+void           gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
 char           *gmnal_gm_error(gm_status_t);
 char           *gmnal_rxevent(gm_recv_event_t*);
 int            gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int);
index 1cb1317..1442aa7 100644 (file)
 
 #include "gmnal.h"
 
+
+
 gmnal_data_t   *global_nal_data = NULL;
+#define         GLOBAL_NID_STR_LEN      16
+char            global_nid_str[GLOBAL_NID_STR_LEN] = {0};
+
+/*
+ *      Write the global nid /proc/sys/gmnal/globalnid
+ */
+#define GMNAL_SYSCTL    201
+#define GMNAL_SYSCTL_GLOBALNID  1
+
+static ctl_table gmnal_sysctl_table[] = {
+        {GMNAL_SYSCTL_GLOBALNID, "globalnid",
+         global_nid_str, GLOBAL_NID_STR_LEN,
+         0444, NULL, &proc_dostring},
+        { 0 }
+};
+
+
+static ctl_table gmnalnal_top_sysctl_table[] = {
+        {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table},
+        { 0 }
+};
+
+
+
+
+
+
 /*
  *     gmnal_api_forward
  *     This function takes a pack block of arguments from the NAL API
@@ -193,8 +222,8 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        ptl_pid_t       portals_pid = 0;
 
 
-       CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], 
-              ac_size[%d]\n", interface, ptl_size, ac_size);
+       CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], "
+              "ac_size[%d]\n", interface, ptl_size, ac_size);
 
 
        PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
@@ -255,8 +284,8 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        }
 
 
-       CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], 
-                      name [%s], version [%d]\n", interface, GMNAL_GM_PORT, 
+       CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], "
+                      "name [%s], version [%d]\n", interface, GMNAL_GM_PORT, 
               "gmnal", GM_API_VERSION);
 
        GMNAL_GM_LOCK(nal_data);
@@ -280,15 +309,15 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
                        CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
                        break;
                case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib 
-                              and driver\n");
+                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
+                              "and driver\n");
                        break;
                case(GM_OUT_OF_MEMORY):
                        CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
                        break;
                default:
-                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error 
-                              code [%d]\n", gm_status);
+                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
+                              "code [%d]\n", gm_status);
                        break;
                }       
                GMNAL_GM_LOCK(nal_data);
@@ -403,6 +432,7 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
        }
        CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
        nal_data->gm_global_nid = global_nid;
+        snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid);
 
 /*
        pid = gm_getpid();
@@ -429,6 +459,9 @@ gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
                return(NULL);
                
        }
+        nal_data->sysctl = NULL;
+        nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0);
+
        
        CDEBUG(D_INFO, "gmnal_init finished\n");
        global_nal_data = nal->nal_data;
@@ -459,6 +492,8 @@ void gmnal_fini()
        gm_close(nal_data->gm_port);
        gm_finalize();
        GMNAL_GM_UNLOCK(nal_data);
+        if (nal_data->sysctl)
+                unregister_sysctl_table (nal_data->sysctl);
        PORTAL_FREE(nal, sizeof(nal_t));        
        PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
        PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
index e055242..d95922b 100644 (file)
@@ -35,8 +35,8 @@ int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], 
-              niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
+       CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
+              "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", 
               nal_cb, private, cookie, niov, iov, mlen, rlen);
 
        switch(srxd->type) {
@@ -64,10 +64,11 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             status = PTL_OK;
        struct iovec    *iovec = NULL, *iovec_dup = NULL;
        int             i = 0;
+       ptl_kiov_t      *kiov_dup = kiov;;
 
 
-       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], 
-              cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+       CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
+              "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
               nal_cb, private, cookie, kniov, kiov, mlen, rlen);
 
        if (srxd->type == GMNAL_SMALL_MESSAGE) {
@@ -99,6 +100,10 @@ int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                CDEBUG(D_INFO, "calling gmnal_small_rx\n");
                status = gmnal_small_rx(nal_cb, private, cookie, kniov, 
                                         iovec_dup, mlen, rlen);
+               for (i=0; i<kniov; i++) {
+                       kunmap(kiov_dup->kiov_page);
+                       kiov_dup++;
+               }
                PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
        }
                
@@ -126,6 +131,7 @@ int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                niov, iov, len);
        } else {
                CDEBUG(D_ERROR, "Large message send it is not supported\n");
+               lib_finalize(nal_cb, private, cookie);
                return(PTL_FAIL);
                gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, 
                                niov, iov, len);
@@ -140,6 +146,7 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int     i = 0;
        gmnal_data_t    *nal_data;
        struct  iovec   *iovec = NULL, *iovec_dup = NULL;
+       ptl_kiov_t      *kiov_dup = kiov;
 
        CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
        nal_data = nal_cb->nal_data;
@@ -181,6 +188,10 @@ int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, 
                                pid, kniov, iovec, len);
        }
+       for (i=0; i<kniov; i++) {
+               kunmap(kiov_dup->kiov_page);
+               kiov_dup++;
+       }
        PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
        return(PTL_OK);
 }
@@ -199,6 +210,18 @@ int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst,
        return(PTL_OK);
 }
 
+int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, 
+                      ptl_event_t *ev)
+{
+
+       if (eq->event_callback != NULL) {
+               CDEBUG(D_INFO, "found callback\n");
+               eq->event_callback(ev);
+       }
+       
+       return(PTL_OK);
+}
+
 void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
 {
        void *ptr = NULL;
index a0d3530..bdc87f6 100644 (file)
@@ -203,14 +203,14 @@ gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
        gmnal_msghdr = (gmnal_msghdr_t*)buffer;
        portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE);
 
-       CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], 
-              type [%d], length [%d], buffer [%p]\n",
+       CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], "
+              "type [%d], length [%d], buffer [%p]\n",
               snode, sport, type, length, buffer);
-       CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], 
-              gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
+       CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], "
+              "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
               gmnal_msghdr->magic, gmnal_msghdr->type);
-       CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], 
-              dest_node ["LPD64"]\n", portals_hdr->src_nid, 
+       CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], "
+              "dest_node ["LPD64"]\n", portals_hdr->src_nid, 
               portals_hdr->dest_nid);
 
        
@@ -321,6 +321,7 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
 
        if (!private) {
                CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
+               lib_finalize(nal_cb, private, cookie);
                return(PTL_FAIL);
        }
 
@@ -342,8 +343,10 @@ gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
         *      let portals library know receive is complete
         */
        CDEBUG(D_PORTALS, "calling lib_finalize\n");
-       lib_finalize(nal_cb, private, cookie, PTL_OK);
-
+       if (lib_finalize(nal_cb, private, cookie) != PTL_OK) {
+               /* TO DO what to do with failed lib_finalise? */
+               CDEBUG(D_INFO, "lib_finalize failed\n");
+       }
        /*
         *      return buffer so it can be used again
         */
@@ -377,9 +380,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        unsigned int    local_nid;
        gm_status_t     gm_status = GM_SUCCESS;
 
-       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] 
-              hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] 
-              iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
+       CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
+              "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
+              "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
@@ -440,9 +443,9 @@ gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        stxd->msg_size = tot_size;
 
 
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] 
-              gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] 
-              stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+              "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
               stxd->msg_size, global_nid, local_nid, stxd);
 
        GMNAL_GM_LOCK(nal_data);
@@ -493,8 +496,8 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                /*
                 *      do a resend on the dropped ones
                 */
-                       CDEBUG(D_ERROR, "send stxd [%p] was dropped 
-                              resending\n", context);
+                       CDEBUG(D_ERROR, "send stxd [%p] was dropped "
+                              "resending\n", context);
                        GMNAL_GM_LOCK(nal_data);
                        gm_send_to_peer_with_callback(nal_data->gm_port, 
                                                      stxd->buffer, 
@@ -569,6 +572,11 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                case(GM_YP_NO_MATCH):
                default:
                        CDEBUG(D_ERROR, "Unknown send error\n");
+                gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
+                                      stxd->gm_target_node, GMNAL_GM_PORT,
+                                      gmnal_resume_sending_callback, context);
+                return;
+
        }
 
        /*
@@ -587,11 +595,26 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                return;
        }
        gmnal_return_stxd(nal_data, stxd);
-       lib_finalize(nal_cb, stxd, cookie, PTL_OK);
-
+       if (lib_finalize(nal_cb, stxd, cookie) != PTL_OK) {
+               CDEBUG(D_INFO, "Call to lib_finalize failed for stxd [%p]\n", 
+                      stxd);
+       }
        return;
 }
 
+/*
+ *     After an error on the port
+ *     call this to allow future sends to complete
+ */
+void gmnal_resume_sending_callback(struct gm_port *gm_port, void *context,
+                                 gm_status_t status)
+{
+        gmnal_data_t    *nal_data;
+        gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
+        CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
+        gmnal_return_stxd(stxd->nal_data, stxd);
+        return;
+}
 
 
 void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, 
@@ -611,8 +634,8 @@ void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
                                              context);
                GMNAL_GM_LOCK(nal_data);
        } else {
-               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is 
-                      [%d][%s]\n", stxd, status, gmnal_gm_error(status));
+               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
+                      "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
        }
 
 
@@ -644,9 +667,9 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        int             niov_dup;
 
 
-       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] 
-              hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], 
-              iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
+       CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
+              "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
+              "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, 
               global_nid, pid, niov, iov, size);
 
        if (nal_cb)
@@ -729,8 +752,8 @@ gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                               iov->iov_base, iov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] 
-                              for memory [%p] len ["LPSZ"]\n", 
+                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+                              "for memory [%p] len ["LPSZ"]\n", 
                               gm_status, gmnal_gm_error(gm_status), 
                               iov->iov_base, iov->iov_len);
                        GMNAL_GM_LOCK(nal_data);
@@ -806,12 +829,13 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
        gmnal_msghdr_t  *msghdr = NULL;
        gm_status_t     gm_status;
 
-       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], 
-              cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
+       CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
+              "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
                nal_cb, private, cookie, nriov, riov, mlen, rlen);
 
        if (!srxd) {
                CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
+               lib_finalize(nal_cb, private, cookie);
                return(PTL_FAIL);
        }
 
@@ -846,8 +870,8 @@ gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
                                               riov->iov_base, riov->iov_len);
                if (gm_status != GM_SUCCESS) {
                        GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] 
-                              for memory [%p] len ["LPSZ"]\n", 
+                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
+                              "for memory [%p] len ["LPSZ"]\n", 
                               gm_status, gmnal_gm_error(gm_status), 
                               riov->iov_base, riov->iov_len);
                        GMNAL_GM_LOCK(nal_data);
@@ -902,8 +926,8 @@ gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov,
 
        int     ncalls = 0;
 
-       CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], 
-              nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
+       CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], "
+              "nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
 
 
        ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov);
@@ -958,8 +982,8 @@ gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
                                            srxd->gm_source_node, 
                                            &source_node) != GM_SUCCESS) {
 
-                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] 
-                              to local node_id\n", srxd->gm_source_node);
+                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] "
+                              "to local node_id\n", srxd->gm_source_node);
                        GMNAL_GM_UNLOCK(nal_data);
                        return(GMNAL_STATUS_FAIL);
                }
@@ -1108,7 +1132,10 @@ gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
         *      Let our client application proceed
         */     
        CDEBUG(D_ERROR, "final callback context[%p]\n", srxd);
-       lib_finalize(nal_cb, srxd, srxd->cookie, PTL_OK);
+       if (lib_finalize(nal_cb, srxd, srxd->cookie) != PTL_OK) {
+               CDEBUG(D_INFO, "Call to lib_finalize failed for srxd [%p]\n", 
+                      srxd);
+       }
 
        /*
         *      send an ack to the sender to let him know we got the data
@@ -1201,9 +1228,9 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
        stxd->msg_size= sizeof(gmnal_msghdr_t);
 
 
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] 
-              gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] 
-              stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
+       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
+              "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] "
+              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
               stxd->msg_size, srxd->gm_source_node, local_nid, stxd);
        GMNAL_GM_LOCK(nal_data);
        stxd->gm_priority = GM_LOW_PRIORITY;
@@ -1273,7 +1300,10 @@ gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
 
        CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
 
-       lib_finalize(nal_cb, stxd, stxd->cookie, PTL_OK);
+       if (lib_finalize(nal_cb, stxd, stxd->cookie) != PTL_OK) {
+               CDEBUG(D_INFO, "Call to lib_finalize failed for stxd [%p]\n", 
+                      stxd);
+       }
 
        /*
         *      extract the iovec from the stxd, deregister the memory.
index 1260629..31f6819 100644 (file)
@@ -30,6 +30,7 @@ int gmnal_small_msg_size = 525312;
  */
 int num_rx_threads = -1;
 int num_stxds = 5;
+int gm_port = 4;
 
 ptl_handle_ni_t        kgmnal_ni;
 
@@ -139,6 +140,7 @@ EXPORT_SYMBOL(kgmnal_ni);
 MODULE_PARM(gmnal_small_msg_size, "i");
 MODULE_PARM(num_rx_threads, "i");
 MODULE_PARM(num_stxds, "i");
+MODULE_PARM(gm_port, "i");
 
 MODULE_AUTHOR("Morgan Doyle");
 
index 55606f3..6a52319 100644 (file)
@@ -117,8 +117,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -131,8 +131,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
 
                txd->next = nal_data->stxd;
                nal_data->stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
        }
 
        for (i=0; i<=nrxt_stx; i++) {
@@ -146,8 +146,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -160,8 +160,8 @@ gmnal_alloc_txd(gmnal_data_t *nal_data)
 
                txd->next = nal_data->rxt_stxd;
                nal_data->rxt_stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
        }
 
        /*
@@ -187,8 +187,8 @@ gmnal_free_txd(gmnal_data_t *nal_data)
        CDEBUG(D_TRACE, "gmnal_free_small tx\n");
 
        while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
                _txd = txd;
                txd = txd->next;
                GMNAL_GM_LOCK(nal_data);
@@ -198,8 +198,8 @@ gmnal_free_txd(gmnal_data_t *nal_data)
        }
         txd = nal_data->rxt_stxd;
        while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], 
-                      size [%d]\n", txd, txd->buffer, txd->buffer_size);
+               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
+                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
                _txd = txd;
                txd = txd->next;
                GMNAL_GM_LOCK(nal_data);
@@ -392,22 +392,22 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
 #if 0
                PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], 
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], "
+                              "size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
                }
-               CDEBUG(D_NET, "Calling gm_register_memory with port [%p] 
-                      rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
+               CDEBUG(D_NET, "Calling gm_register_memory with port [%p] "
+                      "rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
                       rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_LOCK(nal_data);
                gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, 
                                               GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (gm_status != GM_SUCCESS) {
-                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],
-                              index [%d]\n", rxbuffer, i);
+                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],"
+                              index [%d]\n", rxbuffer, i);
                        switch(gm_status) {
                                case(GM_FAILURE):
                                        CDEBUG(D_ERROR, "GM_FAILURE\n");
@@ -432,8 +432,8 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                                         GMNAL_SMALL_MSG_SIZE(nal_data));
                GMNAL_GM_UNLOCK(nal_data);
                if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],
-                              size [%d]\n", i, 
+                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],"
+                              size [%d]\n", i, 
                               GMNAL_SMALL_MSG_SIZE(nal_data));
                        PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
                        return(GMNAL_STATUS_FAIL);
@@ -447,15 +447,15 @@ gmnal_alloc_srxd(gmnal_data_t *nal_data)
                if (gm_hash_insert(nal_data->srxd_hash, 
                                   (void*)rxbuffer, (void*)rxd)) {
 
-                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] 
-                              for rxbuffer[%p]\n", rxd, rxbuffer);
+                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] "
+                              "for rxbuffer[%p]\n", rxd, rxbuffer);
                        return(GMNAL_STATUS_FAIL);
                }
 
                rxd->next = nal_data->srxd;
                nal_data->srxd = rxd;
-               CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], 
-                      size [%d]\n", rxd, rxd->buffer, rxd->size);
+               CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], "
+                      "size [%d]\n", rxd, rxd->buffer, rxd->size);
        }
 
        return(GMNAL_STATUS_OK);
@@ -623,6 +623,8 @@ gmnal_stop_ctthread(gmnal_data_t *nal_data)
 char * 
 gmnal_gm_error(gm_status_t status)
 {
+       return(gm_strerror(status));
+
        switch(status) {
                case(GM_SUCCESS):
                        return("SUCCESS");
@@ -972,7 +974,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
                }
                spin_lock(&nal_data->rxtwe_lock);
                if (nal_data->rxtwe_head) {
-                       CDEBUG(D_WARNING, "Got a work entry\n");
+                       CDEBUG(D_INFO, "Got a work entry\n");
                        we = nal_data->rxtwe_head;
                        nal_data->rxtwe_head = we->next;
                        if (!nal_data->rxtwe_head)
@@ -983,7 +985,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
                spin_unlock(&nal_data->rxtwe_lock);
        } while (!we);
 
-       CDEBUG(D_WARNING, "Returning we[%p]\n", we);
+       CDEBUG(D_INFO, "Returning we[%p]\n", we);
        return(we);
 }
 
index 84ac97f..ff6631c 100644 (file)
@@ -115,5 +115,5 @@ int main(int argc, char **argv)
        free(pcfg.pcfg_pbuf1);
        close(pfd);
        printf("%u\n", nid);
-        exit(nid);
+        exit(0);
 }