Whamcloud - gitweb
Some, but not all, of the GMnal updates submitted by HP.
authorphil <phil>
Tue, 1 Mar 2005 00:35:07 +0000 (00:35 +0000)
committerphil <phil>
Tue, 1 Mar 2005 00:35:07 +0000 (00:35 +0000)
b=5786
gmnal_kernel_changes.patch:
Info:
        Kernel code changes for updated GMNAL as developed by Morgan
        Doyle and maintained in the HP SFS product release.
gmnal_utils_changes.patch:
Info:
        Userspace utils changes for updated GMNAL as developed by Morgan
        Doyle and maintained in the HP SFS product release.

lnet/klnds/gmlnd/gmlnd.h
lnet/klnds/gmlnd/gmlnd_api.c
lnet/klnds/gmlnd/gmlnd_cb.c
lnet/klnds/gmlnd/gmlnd_comm.c
lnet/klnds/gmlnd/gmlnd_module.c
lnet/klnds/gmlnd/gmlnd_utils.c
lnet/utils/gmlndnid.c

index a3492f4..a75f6b9 100644 (file)
 extern  int gmnal_small_msg_size;
 extern  int num_rx_threads;
 extern  int num_stxds;
-extern  int gm_port;
+extern  int gm_port_id;
 #define GMNAL_SMALL_MSG_SIZE(a)                a->small_msg_size
 #define GMNAL_IS_SMALL_MESSAGE(n,a,b,c)        gmnal_is_small_msg(n, a, b, c)
 #define GMNAL_MAGIC                            0x1234abcd
 /*
  *     The gm_port to use for gmnal
  */
-#define GMNAL_GM_PORT  gm_port
+#define GMNAL_GM_PORT_ID       gm_port_id
 
 
 /*
@@ -156,13 +156,14 @@ typedef struct _gmnal_srxd_t {
 
 /*
  *     Header which lmgnal puts at the start of each message
+ *     watch alignment for ia32/64 interaction
  */
 typedef struct _gmnal_msghdr {
        int             magic;
        int             type;
        unsigned int    sender_node_id;
-       gmnal_stxd_t    *stxd;
        int             niov;
+       gm_remote_ptr_t stxd_remote_ptr; /* 64 bits */
        } gmnal_msghdr_t;
 #define GMNAL_MSGHDR_SIZE      sizeof(gmnal_msghdr_t)
 
@@ -193,6 +194,8 @@ typedef struct _gmnal_rxtwe {
 #define NRXTHREADS 10 /* max number of receiver threads */
 
 typedef struct _gmnal_data_t {
+       int             refcnt;
+       spinlock_t      cb_lock;
        spinlock_t      stxd_lock;
        struct semaphore stxd_token;
        gmnal_stxd_t    *stxd;
@@ -300,6 +303,7 @@ extern gmnal_data_t *global_nal_data;
 #define GMNAL_GM_LOCK_INIT(a)          spin_lock_init(&a->gm_lock);
 #define GMNAL_GM_LOCK(a)               spin_lock(&a->gm_lock);
 #define GMNAL_GM_UNLOCK(a)             spin_unlock(&a->gm_lock);
+#define GMNAL_CB_LOCK_INIT(a)          spin_lock_init(&a->cb_lock);
 
 
 /*
@@ -389,7 +393,6 @@ void                gmnal_return_srxd(gmnal_data_t *, gmnal_srxd_t *);
 gmnal_srxd_t   *gmnal_rxbuffer_to_srxd(gmnal_data_t *, void*);
 void           gmnal_stop_rxthread(gmnal_data_t *);
 void           gmnal_stop_ctthread(gmnal_data_t *);
-void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
 void           gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
 void           gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
 char           *gmnal_gm_error(gm_status_t);
@@ -419,11 +422,10 @@ void              gmnal_remove_rxtwe(gmnal_data_t *);
 /*
  *     Small messages
  */
-int            gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, 
-                               struct iovec *, size_t, size_t, size_t);
-int            gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, 
+ptl_err_t      gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *);
+ptl_err_t      gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, 
                                int, ptl_nid_t, ptl_pid_t, 
-                               unsigned int, struct iovec*, size_t, int);
+                               gmnal_stxd_t*, int);
 void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
 
 
index bd6c83e..bd09d78 100644 (file)
@@ -175,11 +175,11 @@ gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
 
 
        CDEBUG(D_NET, "Calling gm_open with port [%d], "
-                      "name [%s], version [%d]\n", GMNAL_GM_PORT, 
+                      "name [%s], version [%d]\n", GMNAL_GM_PORT_ID
               "gmnal", GM_API_VERSION);
 
        GMNAL_GM_LOCK(nal_data);
-       gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT, "gmnal", 
+       gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT_ID, "gmnal", 
                            GM_API_VERSION);
        GMNAL_GM_UNLOCK(nal_data);
 
index 0ebf437..e19995c 100644 (file)
@@ -31,10 +31,10 @@ ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
                   unsigned int niov, struct iovec *iov, size_t offset, 
                   size_t mlen, size_t rlen)
 {
+   void            *buffer = NULL;
        gmnal_srxd_t    *srxd = (gmnal_srxd_t*)private;
        int             status = PTL_OK;
 
-
        CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], "
               "niov[%d], iov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n", 
               libnal, private, cookie, niov, iov, offset, mlen, rlen);
@@ -42,8 +42,34 @@ ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
        switch(srxd->type) {
        case(GMNAL_SMALL_MESSAGE):
                CDEBUG(D_INFO, "gmnal_cb_recv got small message\n");
-               status = gmnal_small_rx(libnal, private, cookie, niov, 
-                                        iov, offset, mlen, rlen);
+               /* HP SFS 1380: Proactively change receives to avoid a receive
+                *  side occurrence of filling pkmap_count[].
+                */
+               buffer = srxd->buffer; 
+               buffer += sizeof(gmnal_msghdr_t);
+               buffer += sizeof(ptl_hdr_t);
+
+               while(niov--) { 
+                       if (offset >= iov->iov_len) {
+                               offset -= iov->iov_len;
+                       } else if (offset > 0) {
+                               CDEBUG(D_INFO, "processing [%p] base [%p] len %d, "
+                                      "offset %d, len ["LPSZ"]\n", iov,
+                               iov->iov_base + offset, iov->iov_len, offset,
+                               iov->iov_len - offset);
+                               gm_bcopy(buffer, iov->iov_base + offset,
+                                        iov->iov_len - offset);
+                               buffer += iov->iov_len - offset;
+                               offset = 0;
+                       } else {
+                               CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov,
+                                      iov->iov_len);
+                               gm_bcopy(buffer, iov->iov_base, iov->iov_len);
+                               buffer += iov->iov_len;
+                       }
+                       iov++;
+               }
+               status = gmnal_small_rx(libnal, private, cookie);
        break;
        case(GMNAL_LARGE_MESSAGE_INIT):
                CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n");
@@ -62,9 +88,8 @@ ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cooki
 {
        gmnal_srxd_t    *srxd = (gmnal_srxd_t*)private;
        int             status = PTL_OK;
-       struct iovec    *iovec = NULL, *iovec_dup = NULL;
-       int             i = 0;
-       ptl_kiov_t      *kiov_dup = kiov;;
+       char            *ptr = NULL;
+       void            *buffer = NULL;
 
 
        CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], "
@@ -72,39 +97,50 @@ ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cooki
               libnal, private, cookie, kniov, kiov, offset, mlen, rlen);
 
        if (srxd->type == GMNAL_SMALL_MESSAGE) {
-               PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov);
-               if (!iovec) {
-                       CDEBUG(D_ERROR, "Can't malloc\n");
-                       return(GMNAL_STATUS_FAIL);
-               }
-                iovec_dup = iovec;
+               buffer = srxd->buffer; 
+               buffer += sizeof(gmnal_msghdr_t);
+               buffer += sizeof(ptl_hdr_t);
 
                /*
                 *      map each page and create an iovec for it
                 */
-               for (i=0; i<kniov; i++) {
-                       CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
-                       CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
-                              kiov->kiov_page, kiov->kiov_len, 
-                              kiov->kiov_offset);
-                       iovec->iov_len = kiov->kiov_len;
-                       CDEBUG(D_INFO, "Calling kmap[%p]", kiov->kiov_page);
+               while (kniov--) {
+                       /* HP SFS 1380: Proactively change receives to avoid a receive
+                        *  side occurrence of filling pkmap_count[].
+                        */
+                       CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", kniov, kiov);
 
-                       iovec->iov_base = kmap(kiov->kiov_page) + 
-                                                 kiov->kiov_offset;
+                       if (offset >= kiov->kiov_len) {
+                               offset -= kiov->kiov_len;
+                       } else {
+                               CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
+                                      kiov->kiov_page, kiov->kiov_len, 
+                                      kiov->kiov_offset);
+                               CDEBUG(D_INFO, "Calling kmap[%p]", kiov->kiov_page);
+                               ptr = ((char *)kmap(kiov->kiov_page)) + kiov->kiov_offset;
 
-                       CDEBUG(D_INFO, "iov_base is [%p]\n", iovec->iov_base);
-                        iovec++;
+                               if (offset > 0) {
+                                       CDEBUG(D_INFO, "processing [%p] base [%p] len %d, "
+                                              "offset %d, len ["LPSZ"]\n", ptr,
+                                              ptr + offset, kiov->kiov_len, offset,
+                                              kiov->kiov_len - offset);
+                                       gm_bcopy(buffer, ptr + offset,
+                                              kiov->kiov_len - offset);
+                                       buffer += kiov->kiov_len - offset;
+                                       offset = 0;
+                               } else {
+                                       CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", ptr,
+                                              kiov->kiov_len);
+                                       gm_bcopy(buffer, ptr, kiov->kiov_len);
+                                       buffer += kiov->kiov_len;
+                               }
+                               kunmap(kiov->kiov_page);
+                               CDEBUG(D_INFO, "Stored in [%p]\n", ptr);
+                        }
                         kiov++;
                }
                CDEBUG(D_INFO, "calling gmnal_small_rx\n");
-               status = gmnal_small_rx(libnal, private, cookie, kniov, 
-                                        iovec_dup, offset, mlen, rlen);
-               for (i=0; i<kniov; i++) {
-                       kunmap(kiov_dup->kiov_page);
-                       kiov_dup++;
-               }
-               PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
+               status = gmnal_small_rx(libnal, private, cookie);
        }
                
 
@@ -119,18 +155,51 @@ ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
 {
 
        gmnal_data_t    *nal_data;
+       void            *buffer = NULL;
+       gmnal_stxd_t    *stxd = NULL;
 
 
        CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] offset["LPSZ"] len["LPSZ"] nid["LPU64"]\n", 
               niov, offset, len, nid);
        nal_data = libnal->libnal_data;
+       if (!nal_data) {
+               CDEBUG(D_ERROR, "no nal_data\n");
+               return(PTL_FAIL);
+       } else {
+               CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
+       }
        
        if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) {
                CDEBUG(D_INFO, "This is a small message send\n");
+               /*
+                * HP SFS 1380: With the change to gmnal_small_tx, need to get the stxd
+                * and do relevant setup here
+                */
+               stxd = gmnal_get_stxd(nal_data, 1);
+               CDEBUG(D_INFO, "stxd [%p]\n", stxd);
+               /* Set the offset of the data to copy into the buffer */
+               buffer = stxd->buffer + sizeof(gmnal_msghdr_t) + sizeof(ptl_hdr_t);
+               while(niov--) {
+                       if (offset >= iov->iov_len) {
+                               offset -= iov->iov_len;
+                       } else if (offset > 0) {
+                               CDEBUG(D_INFO, "processing iov [%p] base [%p] len ["LPSZ"] to [%p]\n",
+                                      iov, iov->iov_base + offset, iov->iov_len - offset, buffer);
+                               gm_bcopy(iov->iov_base + offset, buffer, iov->iov_len - offset);
+                               buffer+= iov->iov_len - offset;
+                               offset = 0;
+                       } else {
+                               CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n",
+                                      iov, iov->iov_len, buffer);
+                               gm_bcopy(iov->iov_base, buffer, iov->iov_len);
+                               buffer+= iov->iov_len;
+                       }
+                       iov++;
+               }
                gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid, 
-                               niov, iov, offset,  len);
+                              stxd,  len);
        } else {
-               CDEBUG(D_ERROR, "Large message send it is not supported\n");
+               CDEBUG(D_ERROR, "Large message send is not supported\n");
                lib_finalize(libnal, private, cookie, PTL_FAIL);
                return(PTL_FAIL);
                gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid, 
@@ -144,36 +213,75 @@ ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cooki
                          unsigned int kniov, ptl_kiov_t *kiov, size_t offset, size_t len)
 {
 
-       int     i = 0;
        gmnal_data_t    *nal_data;
-       struct  iovec   *iovec = NULL, *iovec_dup = NULL;
-       ptl_kiov_t      *kiov_dup = kiov;
+       char            *ptr;
+       void            *buffer = NULL;
+       gmnal_stxd_t    *stxd = NULL;
+       ptl_err_t       status = PTL_OK;
 
        CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] offset["LPSZ"] len["LPSZ"]\n", 
                nid, kniov, offset, len);
        nal_data = libnal->libnal_data;
-       PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
-        iovec_dup = iovec;
+       if (!nal_data) {
+               CDEBUG(D_ERROR, "no nal_data\n");
+               return(PTL_FAIL);
+       } else {
+               CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
+       }
+
+       /* HP SFS 1380: Need to do the gm_bcopy after the kmap so we can kunmap
+        * more aggressively.  This is the fix for a livelock situation under load
+        * on ia32 that occurs when there are no more available entries in the
+        * pkmap_count array.  Just fill the buffer and let gmnal_small_tx
+        * put the headers in after we pass it the stxd pointer.
+        */
+       stxd = gmnal_get_stxd(nal_data, 1);
+       CDEBUG(D_INFO, "stxd [%p]\n", stxd);
+       /* Set the offset of the data to copy into the buffer */
+       buffer = stxd->buffer + sizeof(gmnal_msghdr_t) + sizeof(ptl_hdr_t);
+
        if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) {
                CDEBUG(D_INFO, "This is a small message send\n");
                
-               for (i=0; i<kniov; i++) {
-                       CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
-                       CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
-                              kiov->kiov_page, kiov->kiov_len, 
-                              kiov->kiov_offset);
+               while(kniov--) {
+                       CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", kniov, kiov);
+                       if (offset >= kiov->kiov_len) {
+                               offset -= kiov->kiov_len;
+                       } else {
+                               CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
+                                      kiov->kiov_page, kiov->kiov_len, 
+                                      kiov->kiov_offset);
 
-                       iovec->iov_base = kmap(kiov->kiov_page) 
-                                               + kiov->kiov_offset;
+                               ptr = ((char *)kmap(kiov->kiov_page)) + kiov->kiov_offset;
 
-                       iovec->iov_len = kiov->kiov_len;
-                        iovec++;
+                               if (offset > 0) {
+                                       CDEBUG(D_INFO, "processing [%p] base [%p] len ["LPSZ"] to [%p]\n",
+                                              ptr, ptr + offset, kiov->kiov_len - offset, buffer);
+                                       gm_bcopy(ptr + offset, buffer, kiov->kiov_len - offset);
+                                       buffer+= kiov->kiov_len - offset;
+                                       offset = 0;
+                               } else {
+                                       CDEBUG(D_INFO, "processing kmapped [%p] len ["LPSZ"] to [%p]\n",
+                                              ptr, kiov->kiov_len, buffer);
+                                       gm_bcopy(ptr, buffer, kiov->kiov_len);
+
+                                       buffer += kiov->kiov_len;
+                               }
+                               kunmap(kiov->kiov_page);
+                       }
                         kiov++;
                }
-               gmnal_small_tx(libnal, private, cookie, hdr, type, nid, 
-                               pid, kniov, iovec_dup, offset, len);
+               status = gmnal_small_tx(libnal, private, cookie, hdr, type, nid, 
+                                       pid, stxd, len);
        } else {
+               int     i = 0;
+               struct  iovec   *iovec = NULL, *iovec_dup = NULL;
+               ptl_kiov_t *kiov_dup = kiov;
+
+               PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
+               iovec_dup = iovec;
                CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
+               PORTAL_FREE(iovec, kniov*sizeof(struct iovec));
                return(PTL_FAIL);
                for (i=0; i<kniov; i++) {
                        CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
@@ -189,13 +297,13 @@ ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cooki
                }
                gmnal_large_tx(libnal, private, cookie, hdr, type, nid, 
                                pid, kniov, iovec, offset, len);
+               for (i=0; i<kniov; i++) {
+                       kunmap(kiov_dup->kiov_page);
+                       kiov_dup++;
+               }
+               PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
        }
-       for (i=0; i<kniov; i++) {
-               kunmap(kiov_dup->kiov_page);
-               kiov_dup++;
-       }
-       PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
-       return(PTL_OK);
+       return(status);
 }
 
 int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist)
index 6a8fcbc..aea2967 100644 (file)
@@ -48,6 +48,8 @@ gmnal_ct_thread(void *arg)
        nal_data = (gmnal_data_t*)arg;
        CDEBUG(D_TRACE, "nal_data is [%p]\n", arg);
 
+       sprintf(current->comm, "gmnal_ct");
+
        daemonize();
 
        nal_data->ctthread_flag = GMNAL_CTTHREAD_STARTED;
@@ -113,6 +115,7 @@ int gmnal_rx_thread(void *arg)
        gmnal_data_t            *nal_data;
        void                    *buffer;
        gmnal_rxtwe_t           *we = NULL;
+       int                     rank;
 
        if (!arg) {
                CDEBUG(D_TRACE, "NO nal_data. Exiting\n");
@@ -122,6 +125,12 @@ int gmnal_rx_thread(void *arg)
        nal_data = (gmnal_data_t*)arg;
        CDEBUG(D_TRACE, "nal_data is [%p]\n", arg);
 
+       for (rank=0; rank<num_rx_threads; rank++) 
+               if (nal_data->rxthread_pid[rank] == current->pid)
+                       break;
+
+       sprintf(current->comm, "gmnal_rx_%d", rank);
+
        daemonize();
        /*
         *      set 1 bit for each thread started
@@ -317,17 +326,13 @@ gmnal_rx_bad(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, gmnal_srxd_t *srxd)
  *     Hang out the receive buffer again for another receive
  *     Call lib_finalize
  */
-int
-gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
-               unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen)
+ptl_err_t
+gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie)
 {
        gmnal_srxd_t    *srxd = NULL;
-       void    *buffer = NULL;
        gmnal_data_t    *nal_data = (gmnal_data_t*)libnal->libnal_data;
 
 
-       CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen);
-
        if (!private) {
                CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
                lib_finalize(libnal, private, cookie, PTL_FAIL);
@@ -335,31 +340,6 @@ gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
        }
 
        srxd = (gmnal_srxd_t*)private;
-       buffer = srxd->buffer;
-       buffer += sizeof(gmnal_msghdr_t);
-       buffer += sizeof(ptl_hdr_t);
-
-       while(niov--) {
-                if (offset >= iov->iov_len) {
-                        offset -= iov->iov_len;
-                } else if (offset > 0) {
-                       CDEBUG(D_INFO, "processing [%p] base [%p] len %d, "
-                               "offset %d, len ["LPSZ"]\n", iov,
-                              iov->iov_base + offset, iov->iov_len, offset,
-                               iov->iov_len - offset);
-                       gm_bcopy(buffer, iov->iov_base + offset,
-                                 iov->iov_len - offset);
-                        offset = 0;
-                        buffer += iov->iov_len - offset;
-                } else {
-                       CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov,
-                              iov->iov_len);
-                       gm_bcopy(buffer, iov->iov_base, iov->iov_len);
-                       buffer += iov->iov_len;
-                }
-                iov++;
-       }
-
 
        /*
         *      let portals library know receive is complete
@@ -381,18 +361,16 @@ gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
 
 /*
  *     Start a small transmit. 
- *     Get a send token (and wired transmit buffer).
- *     Copy data from senders buffer to wired buffer and
- *     initiate gm_send from the wired buffer.
+ *     Use the given send token (and wired transmit buffer).
+ *     Copy headers to wired buffer and initiate gm_send from the wired buffer.
  *     The callback function informs when the send is complete.
  */
-int
+ptl_err_t
 gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
                ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, 
-               unsigned int niov, struct iovec *iov, size_t offset, int size)
+               gmnal_stxd_t *stxd, int size)
 {
        gmnal_data_t    *nal_data = (gmnal_data_t*)libnal->libnal_data;
-       gmnal_stxd_t    *stxd = NULL;
        void            *buffer = NULL;
        gmnal_msghdr_t  *msghdr = NULL;
        int             tot_size = 0;
@@ -400,16 +378,16 @@ gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
        gm_status_t     gm_status = GM_SUCCESS;
 
        CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] "
-              "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
-              "iov [%p] size [%d]\n", libnal, private, cookie, hdr, type, 
-              global_nid, pid, niov, iov, size);
+              "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] stxd [%p] "
+              "size [%d]\n", libnal, private, cookie, hdr, type, 
+              global_nid, pid, stxd, size);
 
        CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
               hdr->dest_nid, hdr->src_nid);
 
        if (!nal_data) {
                CDEBUG(D_ERROR, "no nal_data\n");
-               return(GMNAL_STATUS_FAIL);
+               return(PTL_FAIL);
        } else {
                CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
        }
@@ -420,19 +398,17 @@ gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
        GMNAL_GM_UNLOCK(nal_data);
        if (gm_status != GM_SUCCESS) {
                CDEBUG(D_ERROR, "Failed to obtain local id\n");
-               return(GMNAL_STATUS_FAIL);
+               return(PTL_FAIL);
        }
        CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid);
 
-       stxd = gmnal_get_stxd(nal_data, 1);
-       CDEBUG(D_INFO, "stxd [%p]\n", stxd);
-
        stxd->type = GMNAL_SMALL_MESSAGE;
        stxd->cookie = cookie;
 
        /*
         *      Copy gmnal_msg_hdr and portals header to the transmit buffer
-        *      Then copy the data in
+        *      Then send the message, as the data has previously been copied in
+        *      (HP SFS 1380).
         */
        buffer = stxd->buffer;
        msghdr = (gmnal_msghdr_t*)buffer;
@@ -449,24 +425,6 @@ gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
 
        buffer += sizeof(ptl_hdr_t);
 
-       while(niov--) {
-                if (offset >= iov->iov_len) {
-                        offset -= iov->iov_len;
-                } else if (offset > 0) {
-                       CDEBUG(D_INFO, "processing iov [%p] base [%p] len ["LPSZ"] to [%p]\n", 
-                               iov, iov->iov_base + offset, iov->iov_len - offset, buffer);
-                       gm_bcopy(iov->iov_base + offset, buffer, iov->iov_len - offset);
-                       buffer+= iov->iov_len - offset;
-                        offset = 0;
-                } else {
-                       CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n", 
-                               iov, iov->iov_len, buffer);
-                       gm_bcopy(iov->iov_base, buffer, iov->iov_len);
-                       buffer+= iov->iov_len;
-                } 
-                iov++;
-       }
-
        CDEBUG(D_INFO, "sending\n");
        tot_size = size+sizeof(ptl_hdr_t)+sizeof(gmnal_msghdr_t);
        stxd->msg_size = tot_size;
@@ -505,14 +463,25 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
        lib_msg_t       *cookie = stxd->cookie;
        gmnal_data_t    *nal_data = (gmnal_data_t*)stxd->nal_data;
        lib_nal_t       *libnal = nal_data->libnal;
+       unsigned         gnid = 0;
+       gm_status_t      gm_status = 0;
 
        if (!stxd) {
                CDEBUG(D_TRACE, "send completion event for unknown stxd\n");
                return;
        }
        if (status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s]\n", 
-                      stxd, gmnal_gm_error(status));
+               GMNAL_GM_LOCK(nal_data);
+               gm_status = gm_node_id_to_global_id(nal_data->gm_port,
+                                                   stxd->gm_target_node, &gnid);
+               GMNAL_GM_UNLOCK(nal_data);
+               if (gm_status != GM_SUCCESS) {
+                       CDEBUG(D_INFO, "gm_node_id_to_global_id failed[%d]\n",
+                              gm_status);
+                       gnid = 0;
+               }
+               CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s] to [%u]\n", 
+                      stxd, gmnal_gm_error(status), gnid);
        }
 
        switch(status) {
@@ -547,7 +516,7 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                        CDEBUG(D_INFO, "calling gm_drop_sends\n");
                        GMNAL_GM_LOCK(nal_data);
                        gm_drop_sends(nal_data->gm_port, stxd->gm_priority, 
-                                     stxd->gm_target_node, GMNAL_GM_PORT, 
+                                     stxd->gm_target_node, GMNAL_GM_PORT_ID
                                      gmnal_drop_sends_callback, context);
                        GMNAL_GM_UNLOCK(nal_data);
 
@@ -600,9 +569,8 @@ gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
                case(GM_FIRMWARE_NOT_RUNNING):
                case(GM_YP_NO_MATCH):
                default:
-                       CDEBUG(D_ERROR, "Unknown send error\n");
                 gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
-                                      stxd->gm_target_node, GMNAL_GM_PORT,
+                                      stxd->gm_target_node, GMNAL_GM_PORT_ID,
                                       gmnal_resume_sending_callback, context);
                 return;
 
@@ -658,7 +626,7 @@ void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
                                              stxd->gm_target_node, 
                                              gmnal_small_tx_callback, 
                                              context);
-               GMNAL_GM_LOCK(nal_data);
+               GMNAL_GM_UNLOCK(nal_data);
        } else {
                CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
                       "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
@@ -730,7 +698,7 @@ gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
        msghdr->magic = GMNAL_MAGIC;
        msghdr->type = GMNAL_LARGE_MESSAGE_INIT;
        msghdr->sender_node_id = nal_data->gm_global_nid;
-       msghdr->stxd = stxd;
+       msghdr->stxd_remote_ptr = (gm_remote_ptr_t)stxd;
        msghdr->niov = niov ;
        buffer += sizeof(gmnal_msghdr_t);
        mlen = sizeof(gmnal_msghdr_t);
@@ -884,7 +852,7 @@ gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
         *      The gmnal_large_message_ack needs it to notify the sender
         *      the pull of data is complete
         */
-       srxd->source_stxd = msghdr->stxd;
+       srxd->source_stxd = (gmnal_stxd_t*)msghdr->stxd_remote_ptr;
 
        /*
         *      Register the receivers memory
@@ -1003,7 +971,7 @@ gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov,
 /*
  *     pull data from source node (source iovec) to a local iovec.
  *     The iovecs may not match which adds the complications below.
- *     Count the number of gm_gets that will be required to the callbacks
+ *     Count the number of gm_gets that will be required so the callbacks
  *     can determine who is the last one.
  */    
 int
@@ -1067,7 +1035,7 @@ gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
                                remote_ptr = (gm_remote_ptr_t)sbuf_long;
                                gm_get(nal_data->gm_port, remote_ptr, rbuf, 
                                       rlen, GM_LOW_PRIORITY, source_node, 
-                                      GMNAL_GM_PORT, 
+                                      GMNAL_GM_PORT_ID
                                       gmnal_remote_get_callback, ltxd);
                                GMNAL_GM_UNLOCK(nal_data);
                        }
@@ -1091,7 +1059,7 @@ gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
                                remote_ptr = (gm_remote_ptr_t)sbuf_long;
                                gm_get(nal_data->gm_port, remote_ptr, rbuf, 
                                       slen, GM_LOW_PRIORITY, source_node, 
-                                      GMNAL_GM_PORT, 
+                                      GMNAL_GM_PORT_ID
                                       gmnal_remote_get_callback, ltxd);
                                GMNAL_GM_UNLOCK(nal_data);
                        }
@@ -1114,7 +1082,7 @@ gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
                                remote_ptr = (gm_remote_ptr_t)sbuf_long;
                                gm_get(nal_data->gm_port, remote_ptr, rbuf, 
                                       rlen, GM_LOW_PRIORITY, source_node, 
-                                      GMNAL_GM_PORT, 
+                                      GMNAL_GM_PORT_ID
                                       gmnal_remote_get_callback, ltxd);
                                GMNAL_GM_UNLOCK(nal_data);
                        }
@@ -1267,7 +1235,7 @@ gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
        msghdr->magic = GMNAL_MAGIC;
        msghdr->type = GMNAL_LARGE_MESSAGE_ACK;
        msghdr->sender_node_id = nal_data->gm_global_nid;
-       msghdr->stxd = srxd->source_stxd;
+       msghdr->stxd_remote_ptr = (gm_remote_ptr_t)srxd->source_stxd;
        CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer);
 
        CDEBUG(D_INFO, "sending\n");
@@ -1342,7 +1310,7 @@ gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
 
        buffer = srxd->buffer;
        msghdr = (gmnal_msghdr_t*)buffer;
-       stxd = msghdr->stxd;
+       stxd = (gmnal_stxd_t*)msghdr->stxd_remote_ptr;
 
        CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
 
index 3aca90f..9fd2182 100644 (file)
@@ -30,7 +30,7 @@ int gmnal_small_msg_size = 525312;
  */
 int num_rx_threads = -1;
 int num_stxds = 5;
-int gm_port = 4;
+int gm_port_id = 4;
 
 int 
 gmnal_cmd(struct portals_cfg *pcfg, void *private)
@@ -125,7 +125,7 @@ module_exit(gmnal_unload);
 MODULE_PARM(gmnal_small_msg_size, "i");
 MODULE_PARM(num_rx_threads, "i");
 MODULE_PARM(num_stxds, "i");
-MODULE_PARM(gm_port, "i");
+MODULE_PARM(gm_port_id, "i");
 
 MODULE_AUTHOR("Morgan Doyle");
 
index 6a52319..d625019 100644 (file)
@@ -964,7 +964,7 @@ gmnal_get_rxtwe(gmnal_data_t *nal_data)
        CDEBUG(D_NET, "Getting entry to list\n");
 
        do  {
-               down(&nal_data->rxtwe_wait);
+               while(down_interruptible(&nal_data->rxtwe_wait) != 0);
                if (nal_data->rxthread_stop_flag == GMNAL_THREAD_STOP) {
                        /*
                         *      time to stop
index 6a119c0..c4dbe25 100644 (file)
 #include <portals/api-support.h>
 #include <portals/lib-types.h>
 
+#include <gm.h>
+
 #define GMNAL_IOC_GET_GNID 1
+/*
+ *      portals always uses unit 0
+ *      Can this be configurable?
+ */
+#define GM_UNIT 0
+
+/*
+ * prototypes
+ */
+unsigned u_getgmnid(char *name, int get_local_id);
+void usage(char *prg, int h);
 
 int main(int argc, char **argv)
 {
-        int rc, pfd;
-        struct portal_ioctl_data data;
-        struct portals_cfg pcfg;
-       unsigned int    nid = 0, len;
-       char    *name = NULL;
-       int     c;
-
-
-
-       while ((c = getopt(argc, argv, "n:l")) != -1) {
-               switch(c) {
-               case('n'):
-                       name = optarg;  
-               break;
-               case('l'):
-                       printf("Get local id not implemented yet!\n");
-                       exit(-1);
-               default:
-                       printf("usage %s -n nodename [-p]\n", argv[0]);
-               }
-       }
-
-       if (!name) {
-               printf("usage %s -n nodename [-p]\n", argv[0]);
-               exit(-1);
-       }
-
-
-
-
-        PCFG_INIT(pcfg, GMNAL_IOC_GET_GNID);
-        pcfg.pcfg_nal = GMNAL;
-
-       /*
-        *      set up the inputs
-        */
-       len = strlen(name) + 1;
-       pcfg.pcfg_pbuf1 = malloc(len);
-       strcpy(pcfg.pcfg_pbuf1, name);
-       pcfg.pcfg_plen1 = len;
-
-       /*
-        *      set up the outputs
-        */
-       pcfg.pcfg_pbuf2 = (void*)&nid;
-       pcfg.pcfg_plen2 = sizeof(unsigned int*);
-
-        pfd = open("/dev/portals", O_RDWR);
-        if ( pfd < 0 ) {
-                perror("opening portals device");
-               free(pcfg.pcfg_pbuf1);
+    unsigned int        nid = 0;
+    char               *name = NULL;
+    int                        c;
+    int                        get_local_id = 0;
+
+
+
+    while ((c = getopt(argc, argv, "n:lh")) != -1) {
+        switch(c) {
+            case('n'):
+                if (get_local_id) {
+                    usage(argv[0], 0);
+                    exit(-1);
+                }
+            name = optarg;     
+            break;
+            case('h'):
+                usage(argv[0], 1);
                 exit(-1);
-        }
+            break;
+            case('l'):
+                if (name) {
+                    usage(argv[0], 0);
+                    exit(-1);
+                }
+                get_local_id = 1;
+            break;
+            default:
+                usage(argv[0], 0);
+                exit(-1);
+            }
+    }
+
+    if (!name && !get_local_id) {
+        usage(argv[0], 0);
+        exit(-1);
+    }
+
+    nid = u_getgmnid(name, get_local_id);
+    printf("%u\n", nid);
+    exit(0);
+}
 
-        PORTAL_IOC_INIT(data);
-        data.ioc_pbuf1 = (char*)&pcfg;
-        data.ioc_plen1 = sizeof(pcfg);
-                
-        rc = ioctl (pfd, IOC_PORTAL_NAL_CMD, &data);
-        if (rc < 0)
-        {
-               perror ("Can't get my NID");
+unsigned
+u_getgmnid(char *name, int get_local_id)
+{
+    struct gm_port     *gm_port;
+    int                gm_port_id = 2;
+    gm_status_t     gm_status = GM_SUCCESS;
+
+    /*
+     * gm global or local ids are never 0
+     */
+    unsigned   global_nid = 0, local_nid = 0;
+
+    gm_status = gm_init();
+    if (gm_status != GM_SUCCESS) {
+        fprintf(stderr, "gm_init :: %s\n", gm_strerror(gm_status));
+        return(0);
+    }
+       
+    gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id,  
+                           "gmnalnid", GM_API_VERSION);
+
+    if (gm_status != GM_SUCCESS) {
+        /*
+         *     Couldn't open port 2 
+         *     try 4 5 6 7 
+         */
+       
+        for (gm_port_id=4; gm_port_id<8; gm_port_id++) {
+            gm_status = gm_open(&gm_port, 
+                                GM_UNIT, 
+                                gm_port_id,  
+                                "gmnalnid", 
+                                GM_API_VERSION);
+            if (gm_status == GM_SUCCESS) {
+                break;
+            }
+        fprintf(stderr, "gm_open :: %s\n", 
+        gm_strerror(gm_status));
+        gm_finalize();
+        return(0);
+        }
+    }
+
+    if (get_local_id) {
+        local_nid = 1;
+    } else {
+        gm_status = gm_host_name_to_node_id_ex(gm_port, 1000000, name, 
+                                               &local_nid);
+        if (gm_status != GM_SUCCESS) {
+            fprintf(stderr, "gm_host_name_to_node_id_ex :: %s\n", 
+            gm_strerror(gm_status));
+            gm_close(gm_port);
+            gm_finalize();
+            return(0);
         }
-                        
-       free(pcfg.pcfg_pbuf1);
-       close(pfd);
-       printf("%u\n", nid);
-        exit(0);
+    }
+
+    gm_status = gm_node_id_to_global_id(gm_port, local_nid, &global_nid) ;
+    if (gm_status != GM_SUCCESS) {
+        fprintf(stderr, "gm_node_id_to_global_id :: %s\n", 
+        gm_strerror(gm_status));
+        gm_close(gm_port);
+        gm_finalize();
+        return(0);
+    }
+    gm_close(gm_port);
+    gm_finalize();
+    return(global_nid);
+}
+
+void 
+usage(char *prg, int h)
+{
+
+    fprintf(stderr, "usage %s -n hostname | -l | -h\n", prg);
+    if (h) {
+        printf("\nGet Myrinet Global network ids for specified host\n");
+        printf("-l gets network id for local host\n");
+    }
+    return;
 }