extern int gmnal_small_msg_size;
extern int num_rx_threads;
extern int num_stxds;
-extern int gm_port;
+extern int gm_port_id;
#define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size
#define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c)
#define GMNAL_MAGIC 0x1234abcd
/*
* The gm_port to use for gmnal
*/
-#define GMNAL_GM_PORT gm_port
+#define GMNAL_GM_PORT_ID gm_port_id
/*
/*
* Header which lmgnal puts at the start of each message
+ * watch alignment for ia32/64 interaction
*/
typedef struct _gmnal_msghdr {
int magic;
int type;
unsigned int sender_node_id;
- gmnal_stxd_t *stxd;
int niov;
+ gm_remote_ptr_t stxd_remote_ptr; /* 64 bits */
} gmnal_msghdr_t;
#define GMNAL_MSGHDR_SIZE sizeof(gmnal_msghdr_t)
#define NRXTHREADS 10 /* max number of receiver threads */
typedef struct _gmnal_data_t {
+ int refcnt;
+ spinlock_t cb_lock;
spinlock_t stxd_lock;
struct semaphore stxd_token;
gmnal_stxd_t *stxd;
#define GMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock);
#define GMNAL_GM_LOCK(a) spin_lock(&a->gm_lock);
#define GMNAL_GM_UNLOCK(a) spin_unlock(&a->gm_lock);
+#define GMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock);
/*
gmnal_srxd_t *gmnal_rxbuffer_to_srxd(gmnal_data_t *, void*);
void gmnal_stop_rxthread(gmnal_data_t *);
void gmnal_stop_ctthread(gmnal_data_t *);
-void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
void gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
char *gmnal_gm_error(gm_status_t);
/*
* Small messages
*/
-int gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int,
- struct iovec *, size_t, size_t, size_t);
-int gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
+ptl_err_t gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *);
+ptl_err_t gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t,
- unsigned int, struct iovec*, size_t, int);
+ gmnal_stxd_t*, int);
void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
CDEBUG(D_NET, "Calling gm_open with port [%d], "
- "name [%s], version [%d]\n", GMNAL_GM_PORT,
+ "name [%s], version [%d]\n", GMNAL_GM_PORT_ID,
"gmnal", GM_API_VERSION);
GMNAL_GM_LOCK(nal_data);
- gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT, "gmnal",
+ gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT_ID, "gmnal",
GM_API_VERSION);
GMNAL_GM_UNLOCK(nal_data);
unsigned int niov, struct iovec *iov, size_t offset,
size_t mlen, size_t rlen)
{
+ void *buffer = NULL;
gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
int status = PTL_OK;
-
CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], "
"niov[%d], iov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n",
libnal, private, cookie, niov, iov, offset, mlen, rlen);
switch(srxd->type) {
case(GMNAL_SMALL_MESSAGE):
CDEBUG(D_INFO, "gmnal_cb_recv got small message\n");
- status = gmnal_small_rx(libnal, private, cookie, niov,
- iov, offset, mlen, rlen);
+ /* HP SFS 1380: Proactively change receives to avoid a receive
+ * side occurrence of filling pkmap_count[].
+ */
+ buffer = srxd->buffer;
+ buffer += sizeof(gmnal_msghdr_t);
+ buffer += sizeof(ptl_hdr_t);
+
+ while(niov--) {
+ if (offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ } else if (offset > 0) {
+ CDEBUG(D_INFO, "processing [%p] base [%p] len %d, "
+ "offset %d, len ["LPSZ"]\n", iov,
+ iov->iov_base + offset, iov->iov_len, offset,
+ iov->iov_len - offset);
+ gm_bcopy(buffer, iov->iov_base + offset,
+ iov->iov_len - offset);
+ buffer += iov->iov_len - offset;
+ offset = 0;
+ } else {
+ CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov,
+ iov->iov_len);
+ gm_bcopy(buffer, iov->iov_base, iov->iov_len);
+ buffer += iov->iov_len;
+ }
+ iov++;
+ }
+ status = gmnal_small_rx(libnal, private, cookie);
break;
case(GMNAL_LARGE_MESSAGE_INIT):
CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n");
{
gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
int status = PTL_OK;
- struct iovec *iovec = NULL, *iovec_dup = NULL;
- int i = 0;
- ptl_kiov_t *kiov_dup = kiov;;
+ char *ptr = NULL;
+ void *buffer = NULL;
CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], "
libnal, private, cookie, kniov, kiov, offset, mlen, rlen);
if (srxd->type == GMNAL_SMALL_MESSAGE) {
- PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov);
- if (!iovec) {
- CDEBUG(D_ERROR, "Can't malloc\n");
- return(GMNAL_STATUS_FAIL);
- }
- iovec_dup = iovec;
+ buffer = srxd->buffer;
+ buffer += sizeof(gmnal_msghdr_t);
+ buffer += sizeof(ptl_hdr_t);
/*
* map each page and create an iovec for it
*/
- for (i=0; i<kniov; i++) {
- CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
- CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
- kiov->kiov_page, kiov->kiov_len,
- kiov->kiov_offset);
- iovec->iov_len = kiov->kiov_len;
- CDEBUG(D_INFO, "Calling kmap[%p]", kiov->kiov_page);
+ while (kniov--) {
+ /* HP SFS 1380: Proactively change receives to avoid a receive
+ * side occurrence of filling pkmap_count[].
+ */
+ CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", kniov, kiov);
- iovec->iov_base = kmap(kiov->kiov_page) +
- kiov->kiov_offset;
+ if (offset >= kiov->kiov_len) {
+ offset -= kiov->kiov_len;
+ } else {
+ CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
+ kiov->kiov_page, kiov->kiov_len,
+ kiov->kiov_offset);
+ CDEBUG(D_INFO, "Calling kmap[%p]", kiov->kiov_page);
+ ptr = ((char *)kmap(kiov->kiov_page)) + kiov->kiov_offset;
- CDEBUG(D_INFO, "iov_base is [%p]\n", iovec->iov_base);
- iovec++;
+ if (offset > 0) {
+ CDEBUG(D_INFO, "processing [%p] base [%p] len %d, "
+ "offset %d, len ["LPSZ"]\n", ptr,
+ ptr + offset, kiov->kiov_len, offset,
+ kiov->kiov_len - offset);
+ gm_bcopy(buffer, ptr + offset,
+ kiov->kiov_len - offset);
+ buffer += kiov->kiov_len - offset;
+ offset = 0;
+ } else {
+ CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", ptr,
+ kiov->kiov_len);
+ gm_bcopy(buffer, ptr, kiov->kiov_len);
+ buffer += kiov->kiov_len;
+ }
+ kunmap(kiov->kiov_page);
+ CDEBUG(D_INFO, "Stored in [%p]\n", ptr);
+ }
kiov++;
}
CDEBUG(D_INFO, "calling gmnal_small_rx\n");
- status = gmnal_small_rx(libnal, private, cookie, kniov,
- iovec_dup, offset, mlen, rlen);
- for (i=0; i<kniov; i++) {
- kunmap(kiov_dup->kiov_page);
- kiov_dup++;
- }
- PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
+ status = gmnal_small_rx(libnal, private, cookie);
}
{
gmnal_data_t *nal_data;
+ void *buffer = NULL;
+ gmnal_stxd_t *stxd = NULL;
CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] offset["LPSZ"] len["LPSZ"] nid["LPU64"]\n",
niov, offset, len, nid);
nal_data = libnal->libnal_data;
+ if (!nal_data) {
+ CDEBUG(D_ERROR, "no nal_data\n");
+ return(PTL_FAIL);
+ } else {
+ CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
+ }
if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) {
CDEBUG(D_INFO, "This is a small message send\n");
+ /*
+ * HP SFS 1380: With the change to gmnal_small_tx, need to get the stxd
+ * and do relevant setup here
+ */
+ stxd = gmnal_get_stxd(nal_data, 1);
+ CDEBUG(D_INFO, "stxd [%p]\n", stxd);
+ /* Set the offset of the data to copy into the buffer */
+ buffer = stxd->buffer + sizeof(gmnal_msghdr_t) + sizeof(ptl_hdr_t);
+ while(niov--) {
+ if (offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ } else if (offset > 0) {
+ CDEBUG(D_INFO, "processing iov [%p] base [%p] len ["LPSZ"] to [%p]\n",
+ iov, iov->iov_base + offset, iov->iov_len - offset, buffer);
+ gm_bcopy(iov->iov_base + offset, buffer, iov->iov_len - offset);
+ buffer+= iov->iov_len - offset;
+ offset = 0;
+ } else {
+ CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n",
+ iov, iov->iov_len, buffer);
+ gm_bcopy(iov->iov_base, buffer, iov->iov_len);
+ buffer+= iov->iov_len;
+ }
+ iov++;
+ }
gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid,
- niov, iov, offset, len);
+ stxd, len);
} else {
- CDEBUG(D_ERROR, "Large message send it is not supported\n");
+ CDEBUG(D_ERROR, "Large message send is not supported\n");
lib_finalize(libnal, private, cookie, PTL_FAIL);
return(PTL_FAIL);
gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid,
unsigned int kniov, ptl_kiov_t *kiov, size_t offset, size_t len)
{
- int i = 0;
gmnal_data_t *nal_data;
- struct iovec *iovec = NULL, *iovec_dup = NULL;
- ptl_kiov_t *kiov_dup = kiov;
+ char *ptr;
+ void *buffer = NULL;
+ gmnal_stxd_t *stxd = NULL;
+ ptl_err_t status = PTL_OK;
CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] offset["LPSZ"] len["LPSZ"]\n",
nid, kniov, offset, len);
nal_data = libnal->libnal_data;
- PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
- iovec_dup = iovec;
+ if (!nal_data) {
+ CDEBUG(D_ERROR, "no nal_data\n");
+ return(PTL_FAIL);
+ } else {
+ CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
+ }
+
+ /* HP SFS 1380: Need to do the gm_bcopy after the kmap so we can kunmap
+ * more aggressively. This is the fix for a livelock situation under load
+ * on ia32 that occurs when there are no more available entries in the
+ * pkmap_count array. Just fill the buffer and let gmnal_small_tx
+ * put the headers in after we pass it the stxd pointer.
+ */
+ stxd = gmnal_get_stxd(nal_data, 1);
+ CDEBUG(D_INFO, "stxd [%p]\n", stxd);
+ /* Set the offset of the data to copy into the buffer */
+ buffer = stxd->buffer + sizeof(gmnal_msghdr_t) + sizeof(ptl_hdr_t);
+
if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) {
CDEBUG(D_INFO, "This is a small message send\n");
- for (i=0; i<kniov; i++) {
- CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
- CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
- kiov->kiov_page, kiov->kiov_len,
- kiov->kiov_offset);
+ while(kniov--) {
+ CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", kniov, kiov);
+ if (offset >= kiov->kiov_len) {
+ offset -= kiov->kiov_len;
+ } else {
+ CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
+ kiov->kiov_page, kiov->kiov_len,
+ kiov->kiov_offset);
- iovec->iov_base = kmap(kiov->kiov_page)
- + kiov->kiov_offset;
+ ptr = ((char *)kmap(kiov->kiov_page)) + kiov->kiov_offset;
- iovec->iov_len = kiov->kiov_len;
- iovec++;
+ if (offset > 0) {
+ CDEBUG(D_INFO, "processing [%p] base [%p] len ["LPSZ"] to [%p]\n",
+ ptr, ptr + offset, kiov->kiov_len - offset, buffer);
+ gm_bcopy(ptr + offset, buffer, kiov->kiov_len - offset);
+ buffer+= kiov->kiov_len - offset;
+ offset = 0;
+ } else {
+ CDEBUG(D_INFO, "processing kmapped [%p] len ["LPSZ"] to [%p]\n",
+ ptr, kiov->kiov_len, buffer);
+ gm_bcopy(ptr, buffer, kiov->kiov_len);
+
+ buffer += kiov->kiov_len;
+ }
+ kunmap(kiov->kiov_page);
+ }
kiov++;
}
- gmnal_small_tx(libnal, private, cookie, hdr, type, nid,
- pid, kniov, iovec_dup, offset, len);
+ status = gmnal_small_tx(libnal, private, cookie, hdr, type, nid,
+ pid, stxd, len);
} else {
+ int i = 0;
+ struct iovec *iovec = NULL, *iovec_dup = NULL;
+ ptl_kiov_t *kiov_dup = kiov;
+
+ PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
+ iovec_dup = iovec;
CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
+ PORTAL_FREE(iovec, kniov*sizeof(struct iovec));
return(PTL_FAIL);
for (i=0; i<kniov; i++) {
CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
}
gmnal_large_tx(libnal, private, cookie, hdr, type, nid,
pid, kniov, iovec, offset, len);
+ for (i=0; i<kniov; i++) {
+ kunmap(kiov_dup->kiov_page);
+ kiov_dup++;
+ }
+ PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
}
- for (i=0; i<kniov; i++) {
- kunmap(kiov_dup->kiov_page);
- kiov_dup++;
- }
- PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
- return(PTL_OK);
+ return(status);
}
int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist)
nal_data = (gmnal_data_t*)arg;
CDEBUG(D_TRACE, "nal_data is [%p]\n", arg);
+ sprintf(current->comm, "gmnal_ct");
+
daemonize();
nal_data->ctthread_flag = GMNAL_CTTHREAD_STARTED;
gmnal_data_t *nal_data;
void *buffer;
gmnal_rxtwe_t *we = NULL;
+ int rank;
if (!arg) {
CDEBUG(D_TRACE, "NO nal_data. Exiting\n");
nal_data = (gmnal_data_t*)arg;
CDEBUG(D_TRACE, "nal_data is [%p]\n", arg);
+ for (rank=0; rank<num_rx_threads; rank++)
+ if (nal_data->rxthread_pid[rank] == current->pid)
+ break;
+
+ sprintf(current->comm, "gmnal_rx_%d", rank);
+
daemonize();
/*
* set 1 bit for each thread started
* Hang out the receive buffer again for another receive
* Call lib_finalize
*/
-int
-gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen)
+ptl_err_t
+gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie)
{
gmnal_srxd_t *srxd = NULL;
- void *buffer = NULL;
gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data;
- CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen);
-
if (!private) {
CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
lib_finalize(libnal, private, cookie, PTL_FAIL);
}
srxd = (gmnal_srxd_t*)private;
- buffer = srxd->buffer;
- buffer += sizeof(gmnal_msghdr_t);
- buffer += sizeof(ptl_hdr_t);
-
- while(niov--) {
- if (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- } else if (offset > 0) {
- CDEBUG(D_INFO, "processing [%p] base [%p] len %d, "
- "offset %d, len ["LPSZ"]\n", iov,
- iov->iov_base + offset, iov->iov_len, offset,
- iov->iov_len - offset);
- gm_bcopy(buffer, iov->iov_base + offset,
- iov->iov_len - offset);
- offset = 0;
- buffer += iov->iov_len - offset;
- } else {
- CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov,
- iov->iov_len);
- gm_bcopy(buffer, iov->iov_base, iov->iov_len);
- buffer += iov->iov_len;
- }
- iov++;
- }
-
/*
* let portals library know receive is complete
/*
* Start a small transmit.
- * Get a send token (and wired transmit buffer).
- * Copy data from senders buffer to wired buffer and
- * initiate gm_send from the wired buffer.
+ * Use the given send token (and wired transmit buffer).
+ * Copy headers to wired buffer and initiate gm_send from the wired buffer.
* The callback function informs when the send is complete.
*/
-int
+ptl_err_t
gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov, size_t offset, int size)
+ gmnal_stxd_t *stxd, int size)
{
gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data;
- gmnal_stxd_t *stxd = NULL;
void *buffer = NULL;
gmnal_msghdr_t *msghdr = NULL;
int tot_size = 0;
gm_status_t gm_status = GM_SUCCESS;
CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] "
- "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
- "iov [%p] size [%d]\n", libnal, private, cookie, hdr, type,
- global_nid, pid, niov, iov, size);
+ "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] stxd [%p] "
+ "size [%d]\n", libnal, private, cookie, hdr, type,
+ global_nid, pid, stxd, size);
CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
hdr->dest_nid, hdr->src_nid);
if (!nal_data) {
CDEBUG(D_ERROR, "no nal_data\n");
- return(GMNAL_STATUS_FAIL);
+ return(PTL_FAIL);
} else {
CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
}
GMNAL_GM_UNLOCK(nal_data);
if (gm_status != GM_SUCCESS) {
CDEBUG(D_ERROR, "Failed to obtain local id\n");
- return(GMNAL_STATUS_FAIL);
+ return(PTL_FAIL);
}
CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid);
- stxd = gmnal_get_stxd(nal_data, 1);
- CDEBUG(D_INFO, "stxd [%p]\n", stxd);
-
stxd->type = GMNAL_SMALL_MESSAGE;
stxd->cookie = cookie;
/*
* Copy gmnal_msg_hdr and portals header to the transmit buffer
- * Then copy the data in
+ * Then send the message, as the data has previously been copied in
+ * (HP SFS 1380).
*/
buffer = stxd->buffer;
msghdr = (gmnal_msghdr_t*)buffer;
buffer += sizeof(ptl_hdr_t);
- while(niov--) {
- if (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- } else if (offset > 0) {
- CDEBUG(D_INFO, "processing iov [%p] base [%p] len ["LPSZ"] to [%p]\n",
- iov, iov->iov_base + offset, iov->iov_len - offset, buffer);
- gm_bcopy(iov->iov_base + offset, buffer, iov->iov_len - offset);
- buffer+= iov->iov_len - offset;
- offset = 0;
- } else {
- CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n",
- iov, iov->iov_len, buffer);
- gm_bcopy(iov->iov_base, buffer, iov->iov_len);
- buffer+= iov->iov_len;
- }
- iov++;
- }
-
CDEBUG(D_INFO, "sending\n");
tot_size = size+sizeof(ptl_hdr_t)+sizeof(gmnal_msghdr_t);
stxd->msg_size = tot_size;
lib_msg_t *cookie = stxd->cookie;
gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data;
lib_nal_t *libnal = nal_data->libnal;
+ unsigned gnid = 0;
+ gm_status_t gm_status = 0;
if (!stxd) {
CDEBUG(D_TRACE, "send completion event for unknown stxd\n");
return;
}
if (status != GM_SUCCESS) {
- CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s]\n",
- stxd, gmnal_gm_error(status));
+ GMNAL_GM_LOCK(nal_data);
+ gm_status = gm_node_id_to_global_id(nal_data->gm_port,
+ stxd->gm_target_node, &gnid);
+ GMNAL_GM_UNLOCK(nal_data);
+ if (gm_status != GM_SUCCESS) {
+ CDEBUG(D_INFO, "gm_node_id_to_global_id failed[%d]\n",
+ gm_status);
+ gnid = 0;
+ }
+ CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s] to [%u]\n",
+ stxd, gmnal_gm_error(status), gnid);
}
switch(status) {
CDEBUG(D_INFO, "calling gm_drop_sends\n");
GMNAL_GM_LOCK(nal_data);
gm_drop_sends(nal_data->gm_port, stxd->gm_priority,
- stxd->gm_target_node, GMNAL_GM_PORT,
+ stxd->gm_target_node, GMNAL_GM_PORT_ID,
gmnal_drop_sends_callback, context);
GMNAL_GM_UNLOCK(nal_data);
case(GM_FIRMWARE_NOT_RUNNING):
case(GM_YP_NO_MATCH):
default:
- CDEBUG(D_ERROR, "Unknown send error\n");
gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
- stxd->gm_target_node, GMNAL_GM_PORT,
+ stxd->gm_target_node, GMNAL_GM_PORT_ID,
gmnal_resume_sending_callback, context);
return;
stxd->gm_target_node,
gmnal_small_tx_callback,
context);
- GMNAL_GM_LOCK(nal_data);
+ GMNAL_GM_UNLOCK(nal_data);
} else {
CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
"[%d][%s]\n", stxd, status, gmnal_gm_error(status));
msghdr->magic = GMNAL_MAGIC;
msghdr->type = GMNAL_LARGE_MESSAGE_INIT;
msghdr->sender_node_id = nal_data->gm_global_nid;
- msghdr->stxd = stxd;
+ msghdr->stxd_remote_ptr = (gm_remote_ptr_t)stxd;
msghdr->niov = niov ;
buffer += sizeof(gmnal_msghdr_t);
mlen = sizeof(gmnal_msghdr_t);
* The gmnal_large_message_ack needs it to notify the sender
* the pull of data is complete
*/
- srxd->source_stxd = msghdr->stxd;
+ srxd->source_stxd = (gmnal_stxd_t*)msghdr->stxd_remote_ptr;
/*
* Register the receivers memory
/*
* pull data from source node (source iovec) to a local iovec.
* The iovecs may not match which adds the complications below.
- * Count the number of gm_gets that will be required to the callbacks
+ * Count the number of gm_gets that will be required so the callbacks
* can determine who is the last one.
*/
int
remote_ptr = (gm_remote_ptr_t)sbuf_long;
gm_get(nal_data->gm_port, remote_ptr, rbuf,
rlen, GM_LOW_PRIORITY, source_node,
- GMNAL_GM_PORT,
+ GMNAL_GM_PORT_ID,
gmnal_remote_get_callback, ltxd);
GMNAL_GM_UNLOCK(nal_data);
}
remote_ptr = (gm_remote_ptr_t)sbuf_long;
gm_get(nal_data->gm_port, remote_ptr, rbuf,
slen, GM_LOW_PRIORITY, source_node,
- GMNAL_GM_PORT,
+ GMNAL_GM_PORT_ID,
gmnal_remote_get_callback, ltxd);
GMNAL_GM_UNLOCK(nal_data);
}
remote_ptr = (gm_remote_ptr_t)sbuf_long;
gm_get(nal_data->gm_port, remote_ptr, rbuf,
rlen, GM_LOW_PRIORITY, source_node,
- GMNAL_GM_PORT,
+ GMNAL_GM_PORT_ID,
gmnal_remote_get_callback, ltxd);
GMNAL_GM_UNLOCK(nal_data);
}
msghdr->magic = GMNAL_MAGIC;
msghdr->type = GMNAL_LARGE_MESSAGE_ACK;
msghdr->sender_node_id = nal_data->gm_global_nid;
- msghdr->stxd = srxd->source_stxd;
+ msghdr->stxd_remote_ptr = (gm_remote_ptr_t)srxd->source_stxd;
CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer);
CDEBUG(D_INFO, "sending\n");
buffer = srxd->buffer;
msghdr = (gmnal_msghdr_t*)buffer;
- stxd = msghdr->stxd;
+ stxd = (gmnal_stxd_t*)msghdr->stxd_remote_ptr;
CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
*/
int num_rx_threads = -1;
int num_stxds = 5;
-int gm_port = 4;
+int gm_port_id = 4;
int
gmnal_cmd(struct portals_cfg *pcfg, void *private)
MODULE_PARM(gmnal_small_msg_size, "i");
MODULE_PARM(num_rx_threads, "i");
MODULE_PARM(num_stxds, "i");
-MODULE_PARM(gm_port, "i");
+MODULE_PARM(gm_port_id, "i");
MODULE_AUTHOR("Morgan Doyle");
CDEBUG(D_NET, "Getting entry to list\n");
do {
- down(&nal_data->rxtwe_wait);
+ while(down_interruptible(&nal_data->rxtwe_wait) != 0);
if (nal_data->rxthread_stop_flag == GMNAL_THREAD_STOP) {
/*
* time to stop
#include <portals/api-support.h>
#include <portals/lib-types.h>
+#include <gm.h>
+
#define GMNAL_IOC_GET_GNID 1
+/*
+ * portals always uses unit 0
+ * Can this be configurable?
+ */
+#define GM_UNIT 0
+
+/*
+ * prototypes
+ */
+unsigned u_getgmnid(char *name, int get_local_id);
+void usage(char *prg, int h);
int main(int argc, char **argv)
{
- int rc, pfd;
- struct portal_ioctl_data data;
- struct portals_cfg pcfg;
- unsigned int nid = 0, len;
- char *name = NULL;
- int c;
-
-
-
- while ((c = getopt(argc, argv, "n:l")) != -1) {
- switch(c) {
- case('n'):
- name = optarg;
- break;
- case('l'):
- printf("Get local id not implemented yet!\n");
- exit(-1);
- default:
- printf("usage %s -n nodename [-p]\n", argv[0]);
- }
- }
-
- if (!name) {
- printf("usage %s -n nodename [-p]\n", argv[0]);
- exit(-1);
- }
-
-
-
-
- PCFG_INIT(pcfg, GMNAL_IOC_GET_GNID);
- pcfg.pcfg_nal = GMNAL;
-
- /*
- * set up the inputs
- */
- len = strlen(name) + 1;
- pcfg.pcfg_pbuf1 = malloc(len);
- strcpy(pcfg.pcfg_pbuf1, name);
- pcfg.pcfg_plen1 = len;
-
- /*
- * set up the outputs
- */
- pcfg.pcfg_pbuf2 = (void*)&nid;
- pcfg.pcfg_plen2 = sizeof(unsigned int*);
-
- pfd = open("/dev/portals", O_RDWR);
- if ( pfd < 0 ) {
- perror("opening portals device");
- free(pcfg.pcfg_pbuf1);
+ unsigned int nid = 0;
+ char *name = NULL;
+ int c;
+ int get_local_id = 0;
+
+
+
+ while ((c = getopt(argc, argv, "n:lh")) != -1) {
+ switch(c) {
+ case('n'):
+ if (get_local_id) {
+ usage(argv[0], 0);
+ exit(-1);
+ }
+ name = optarg;
+ break;
+ case('h'):
+ usage(argv[0], 1);
exit(-1);
- }
+ break;
+ case('l'):
+ if (name) {
+ usage(argv[0], 0);
+ exit(-1);
+ }
+ get_local_id = 1;
+ break;
+ default:
+ usage(argv[0], 0);
+ exit(-1);
+ }
+ }
+
+ if (!name && !get_local_id) {
+ usage(argv[0], 0);
+ exit(-1);
+ }
+
+ nid = u_getgmnid(name, get_local_id);
+ printf("%u\n", nid);
+ exit(0);
+}
- PORTAL_IOC_INIT(data);
- data.ioc_pbuf1 = (char*)&pcfg;
- data.ioc_plen1 = sizeof(pcfg);
-
- rc = ioctl (pfd, IOC_PORTAL_NAL_CMD, &data);
- if (rc < 0)
- {
- perror ("Can't get my NID");
+unsigned
+u_getgmnid(char *name, int get_local_id)
+{
+ struct gm_port *gm_port;
+ int gm_port_id = 2;
+ gm_status_t gm_status = GM_SUCCESS;
+
+ /*
+ * gm global or local ids are never 0
+ */
+ unsigned global_nid = 0, local_nid = 0;
+
+ gm_status = gm_init();
+ if (gm_status != GM_SUCCESS) {
+ fprintf(stderr, "gm_init :: %s\n", gm_strerror(gm_status));
+ return(0);
+ }
+
+ gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id,
+ "gmnalnid", GM_API_VERSION);
+
+ if (gm_status != GM_SUCCESS) {
+ /*
+ * Couldn't open port 2
+ * try 4 5 6 7
+ */
+
+ for (gm_port_id=4; gm_port_id<8; gm_port_id++) {
+ gm_status = gm_open(&gm_port,
+ GM_UNIT,
+ gm_port_id,
+ "gmnalnid",
+ GM_API_VERSION);
+ if (gm_status == GM_SUCCESS) {
+ break;
+ }
+ fprintf(stderr, "gm_open :: %s\n",
+ gm_strerror(gm_status));
+ gm_finalize();
+ return(0);
+ }
+ }
+
+ if (get_local_id) {
+ local_nid = 1;
+ } else {
+ gm_status = gm_host_name_to_node_id_ex(gm_port, 1000000, name,
+ &local_nid);
+ if (gm_status != GM_SUCCESS) {
+ fprintf(stderr, "gm_host_name_to_node_id_ex :: %s\n",
+ gm_strerror(gm_status));
+ gm_close(gm_port);
+ gm_finalize();
+ return(0);
}
-
- free(pcfg.pcfg_pbuf1);
- close(pfd);
- printf("%u\n", nid);
- exit(0);
+ }
+
+ gm_status = gm_node_id_to_global_id(gm_port, local_nid, &global_nid) ;
+ if (gm_status != GM_SUCCESS) {
+ fprintf(stderr, "gm_node_id_to_global_id :: %s\n",
+ gm_strerror(gm_status));
+ gm_close(gm_port);
+ gm_finalize();
+ return(0);
+ }
+ gm_close(gm_port);
+ gm_finalize();
+ return(global_nid);
+}
+
+void
+usage(char *prg, int h)
+{
+
+ fprintf(stderr, "usage %s -n hostname | -l | -h\n", prg);
+ if (h) {
+ printf("\nGet Myrinet Global network ids for specified host\n");
+ printf("-l gets network id for local host\n");
+ }
+ return;
}