From bba77ef43a947319e93c7605df5d42ab77ab33de Mon Sep 17 00:00:00 2001 From: eeb Date: Thu, 2 Oct 2003 15:00:13 +0000 Subject: [PATCH] * new lgmnal replaces gmnal --- lnet/archdep.m4 | 2 +- lnet/klnds/gmlnd/Makefile.am | 4 +- .../knals/lgmnal => lnet/klnds/gmlnd}/Makefile.mk | 4 +- lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch | 43 - lnet/klnds/gmlnd/gmlnd.h | 510 ++++++-- lnet/klnds/gmlnd/gmlnd_api.c | 474 +++++++ lnet/klnds/gmlnd/gmlnd_cb.c | 659 ++++------ lnet/klnds/gmlnd/gmlnd_comm.c | 1316 ++++++++++++++++++++ lnet/klnds/gmlnd/gmlnd_module.c | 147 +++ lnet/klnds/gmlnd/gmlnd_utils.c | 1007 +++++++++++++++ lnet/klnds/gmlnd/gmnal.c | 284 ----- lnet/klnds/lgmlnd/Makefile.am | 13 - lnet/klnds/lgmlnd/lgmnal.h | 463 ------- lnet/klnds/lgmlnd/lgmnal_api.c | 527 -------- lnet/klnds/lgmlnd/lgmnal_cb.c | 258 ---- lnet/klnds/lgmlnd/lgmnal_comm.c | 477 ------- lnet/klnds/lgmlnd/lgmnal_module.c | 137 -- lnet/klnds/lgmlnd/lgmnal_utils.c | 860 ------------- lnet/tests/startclient.sh | 9 +- lnet/tests/startserver.sh | 9 +- lnet/utils/.cvsignore | 1 + lnet/utils/Makefile.am | 4 +- lnet/utils/gmlndnid.c | 118 ++ lustre/portals/archdep.m4 | 2 +- lustre/portals/knals/gmnal/Makefile.am | 4 +- .../portals/knals/gmnal}/Makefile.mk | 4 +- .../portals/knals/gmnal/gm-1.5.2.1-exports.patch | 43 - lustre/portals/knals/gmnal/gmnal.c | 284 ----- lustre/portals/knals/gmnal/gmnal.h | 510 ++++++-- lustre/portals/knals/gmnal/gmnal_api.c | 474 +++++++ lustre/portals/knals/gmnal/gmnal_cb.c | 659 ++++------ lustre/portals/knals/gmnal/gmnal_comm.c | 1316 ++++++++++++++++++++ lustre/portals/knals/gmnal/gmnal_module.c | 147 +++ lustre/portals/knals/gmnal/gmnal_utils.c | 1007 +++++++++++++++ lustre/portals/knals/lgmnal/Makefile.am | 13 - lustre/portals/knals/lgmnal/lgmnal.h | 463 ------- lustre/portals/knals/lgmnal/lgmnal_api.c | 527 -------- lustre/portals/knals/lgmnal/lgmnal_cb.c | 258 ---- lustre/portals/knals/lgmnal/lgmnal_comm.c | 477 ------- lustre/portals/knals/lgmnal/lgmnal_module.c | 137 -- lustre/portals/knals/lgmnal/lgmnal_utils.c | 860 ------------- lustre/portals/tests/startclient.sh | 9 +- lustre/portals/tests/startserver.sh | 9 +- lustre/portals/utils/.cvsignore | 1 + lustre/portals/utils/Makefile.am | 4 +- lustre/portals/utils/gmnalnid.c | 118 ++ 46 files changed, 7470 insertions(+), 7182 deletions(-) rename {lustre/portals/knals/lgmnal => lnet/klnds/gmlnd}/Makefile.mk (63%) delete mode 100644 lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch create mode 100644 lnet/klnds/gmlnd/gmlnd_api.c create mode 100644 lnet/klnds/gmlnd/gmlnd_comm.c create mode 100644 lnet/klnds/gmlnd/gmlnd_module.c create mode 100644 lnet/klnds/gmlnd/gmlnd_utils.c delete mode 100644 lnet/klnds/gmlnd/gmnal.c delete mode 100644 lnet/klnds/lgmlnd/Makefile.am delete mode 100644 lnet/klnds/lgmlnd/lgmnal.h delete mode 100644 lnet/klnds/lgmlnd/lgmnal_api.c delete mode 100644 lnet/klnds/lgmlnd/lgmnal_cb.c delete mode 100644 lnet/klnds/lgmlnd/lgmnal_comm.c delete mode 100644 lnet/klnds/lgmlnd/lgmnal_module.c delete mode 100644 lnet/klnds/lgmlnd/lgmnal_utils.c create mode 100644 lnet/utils/gmlndnid.c rename {lnet/klnds/lgmlnd => lustre/portals/knals/gmnal}/Makefile.mk (63%) delete mode 100644 lustre/portals/knals/gmnal/gm-1.5.2.1-exports.patch delete mode 100644 lustre/portals/knals/gmnal/gmnal.c create mode 100644 lustre/portals/knals/gmnal/gmnal_api.c create mode 100644 lustre/portals/knals/gmnal/gmnal_comm.c create mode 100644 lustre/portals/knals/gmnal/gmnal_module.c create mode 100644 lustre/portals/knals/gmnal/gmnal_utils.c delete mode 100644 lustre/portals/knals/lgmnal/Makefile.am delete mode 100644 lustre/portals/knals/lgmnal/lgmnal.h delete mode 100644 lustre/portals/knals/lgmnal/lgmnal_api.c delete mode 100644 lustre/portals/knals/lgmnal/lgmnal_cb.c delete mode 100644 lustre/portals/knals/lgmnal/lgmnal_comm.c delete mode 100644 lustre/portals/knals/lgmnal/lgmnal_module.c delete mode 100644 lustre/portals/knals/lgmnal/lgmnal_utils.c create mode 100644 lustre/portals/utils/gmnalnid.c diff --git a/lnet/archdep.m4 b/lnet/archdep.m4 index 41349fd..7910823 100644 --- a/lnet/archdep.m4 +++ b/lnet/archdep.m4 @@ -286,7 +286,7 @@ if test "${with_gm+set}" = set; then if test "${with_gm}" = yes; then with_gm="-I/usr/local/gm/include" else - with_gm=-I"$with_gm/include" + with_gm="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm" fi GMNAL="gmnal" else diff --git a/lnet/klnds/gmlnd/Makefile.am b/lnet/klnds/gmlnd/Makefile.am index 1dc6f4e..bac4680 100644 --- a/lnet/klnds/gmlnd/Makefile.am +++ b/lnet/klnds/gmlnd/Makefile.am @@ -9,5 +9,5 @@ MODULE = kgmnal modulenet_DATA = kgmnal.o EXTRA_PROGRAMS = kgmnal -DEFS = -kgmnal_SOURCES = gmnal.c gmnal_cb.c gmnal.h +DEFS = -DGM_KERNEL +kgmnal_SOURCES = gmnal.h gmnal_api.c gmnal_cb.c gmnal_comm.c gmnal_utils.c gmnal_module.c diff --git a/lustre/portals/knals/lgmnal/Makefile.mk b/lnet/klnds/gmlnd/Makefile.mk similarity index 63% rename from lustre/portals/knals/lgmnal/Makefile.mk rename to lnet/klnds/gmlnd/Makefile.mk index c8ca67f..b799a47 100644 --- a/lustre/portals/knals/lgmnal/Makefile.mk +++ b/lnet/klnds/gmlnd/Makefile.mk @@ -5,6 +5,6 @@ include ../../Kernelenv -obj-y += lgmnal.o -lgmnal-objs := lgmnal_api.o lgmnal_cb.o lgmnal_utils.o lgmnal_comm.o lgmnal_module.o +obj-y += gmnal.o +gmnal-objs := gmnal_api.o gmnal_cb.o gmnal_utils.o gmnal_comm.o gmnal_module.o diff --git a/lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch b/lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch deleted file mode 100644 index 23c80d9..0000000 --- a/lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch +++ /dev/null @@ -1,43 +0,0 @@ -diff -ru gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c ---- gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c Mon Jul 1 10:35:09 2002 -+++ gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c Thu Sep 19 14:19:38 2002 -@@ -30,6 +30,8 @@ - * - ************************************************************************/ - -+#define EXPORT_SYMTAB -+ - #include - #include - -@@ -4075,6 +4077,28 @@ - return 0; - } - -+EXPORT_SYMBOL(gm_blocking_receive_no_spin); -+EXPORT_SYMBOL(gm_close); -+EXPORT_SYMBOL(gm_dma_free); -+EXPORT_SYMBOL(gm_dma_malloc); -+EXPORT_SYMBOL(gm_drop_sends); -+EXPORT_SYMBOL(gm_finalize); -+EXPORT_SYMBOL(gm_get_node_id); -+EXPORT_SYMBOL(gm_init); -+EXPORT_SYMBOL(gm_initialize_alarm); -+EXPORT_SYMBOL(gm_max_node_id_in_use); -+EXPORT_SYMBOL(gm_min_size_for_length); -+EXPORT_SYMBOL(gm_num_receive_tokens); -+EXPORT_SYMBOL(gm_num_send_tokens); -+EXPORT_SYMBOL(gm_open); -+EXPORT_SYMBOL(gm_provide_receive_buffer); -+EXPORT_SYMBOL(gm_resume_sending); -+EXPORT_SYMBOL(gm_send_with_callback); -+EXPORT_SYMBOL(gm_set_acceptable_sizes); -+EXPORT_SYMBOL(gm_set_alarm); -+EXPORT_SYMBOL(gm_unknown); -+ -+ - /* - This file uses GM standard indentation. - -Only in gm-1.5.2.1_Linux-cfs/drivers/linux/gm: gm_arch.c~ -Only in gm-1.5.2.1_Linux-cfs/: trace diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h index 47e8c3c..fdde839 100644 --- a/lnet/klnds/gmlnd/gmlnd.h +++ b/lnet/klnds/gmlnd/gmlnd.h @@ -1,101 +1,455 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#ifndef _GMNAL_H -#define _GMNAL_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include + + +/* + * Portals GM kernel NAL header file + * This file makes all declaration and prototypes + * for the API side and CB side of the NAL + */ +#ifndef __INCLUDE_GMNAL_H__ +#define __INCLUDE_GMNAL_H__ + +#include "linux/config.h" +#include "linux/module.h" +#include "linux/tty.h" +#include "linux/kernel.h" +#include "linux/mm.h" +#include "linux/string.h" +#include "linux/stat.h" +#include "linux/errno.h" +#include "linux/locks.h" +#include "linux/unistd.h" +#include "linux/init.h" +#include "linux/sem.h" +#include "linux/vmalloc.h" +#ifdef MODVERSIONS +#include +#endif #define DEBUG_SUBSYSTEM S_GMNAL -#include -#include -#include +#include "portals/nal.h" +#include "portals/api.h" +#include "portals/errno.h" +#include "linux/kp30.h" +#include "portals/p30.h" + +#include "portals/lib-nal.h" +#include "portals/lib-p30.h" + +#define GM_STRONG_TYPES 1 +#include "gm.h" +#include "gm_internal.h" + + +/* + * Defines for the API NAL + */ + +/* + * Small message size is configurable + * insmod can set small_msg_size + * which is used to populate nal_data.small_msg_size + */ +#define GMNAL_SMALL_MESSAGE 1078 +#define GMNAL_LARGE_MESSAGE_INIT 1079 +#define GMNAL_LARGE_MESSAGE_ACK 1080 +#define GMNAL_LARGE_MESSAGE_FINI 1081 + +extern int gmnal_small_msg_size; +extern int num_rx_threads; +extern int num_stxds; +#define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size +#define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c) +#define GMNAL_MAGIC 0x1234abcd + + +/* + * Small Transmit Descriptor + * A structre to keep track of a small transmit operation + * This structure has a one-to-one relationship with a small + * transmit buffer (both create by gmnal_stxd_alloc). + * There are two free list of stxd. One for use by clients of the NAL + * and the other by the NAL rxthreads when doing sends. + * This helps prevent deadlock caused by stxd starvation. + */ +typedef struct _gmnal_stxd_t { + void *buffer; + int buffer_size; + gm_size_t gm_size; + int msg_size; + int gm_target_node; + int gm_priority; + int type; + struct _gmnal_data_t *nal_data; + lib_msg_t *cookie; + int niov; + struct iovec iov[PTL_MD_MAX_IOV]; + struct _gmnal_srxd_t *srxd; + struct _gmnal_stxd_t *next; + int rxt; + int kniov; + struct iovec *iovec_dup; +} gmnal_stxd_t; + +/* + * as for gmnal_stxd_t + * a hash table in nal_data find srxds from + * the rx buffer address. hash table populated at init time + */ +typedef struct _gmnal_srxd_t { + void *buffer; + int size; + gm_size_t gmsize; + unsigned int gm_source_node; + gmnal_stxd_t *source_stxd; + int type; + int nsiov; + int nriov; + struct iovec *riov; + int ncallbacks; + spinlock_t callback_lock; + int callback_status; + lib_msg_t *cookie; + struct _gmnal_srxd_t *next; + struct _gmnal_data_t *nal_data; +} gmnal_srxd_t; + +/* + * Header which lmgnal puts at the start of each message + */ +typedef struct _gmnal_msghdr { + int magic; + int type; + unsigned int sender_node_id; + gmnal_stxd_t *stxd; + int niov; + } gmnal_msghdr_t; +#define GMNAL_MSGHDR_SIZE sizeof(gmnal_msghdr_t) + +/* + * the caretaker thread (ct_thread) gets receive events + * (and other events) from the myrinet device via the GM2 API. + * caretaker thread populates one work entry for each receive event, + * puts it on a Q in nal_data and wakes a receive thread to + * process the receive. + * Processing a portals receive can involve a transmit operation. + * Because of this the caretaker thread cannot process receives + * as it may get deadlocked when supply of transmit descriptors + * is exhausted (as caretaker thread is responsible for replacing + * transmit descriptors on the free list) + */ +typedef struct _gmnal_rxtwe { + gm_recv_event_t *rx; + struct _gmnal_rxtwe *next; +} gmnal_rxtwe_t; + +/* + * 1 receive thread started on each CPU + */ +#define NRXTHREADS 10 /* max number of receiver threads */ + +typedef struct _gmnal_data_t { + int refcnt; + spinlock_t cb_lock; + spinlock_t stxd_lock; + struct semaphore stxd_token; + gmnal_stxd_t *stxd; + spinlock_t rxt_stxd_lock; + struct semaphore rxt_stxd_token; + gmnal_stxd_t *rxt_stxd; + spinlock_t srxd_lock; + struct semaphore srxd_token; + gmnal_srxd_t *srxd; + struct gm_hash *srxd_hash; + nal_t *nal; + nal_cb_t *nal_cb; + struct gm_port *gm_port; + unsigned int gm_local_nid; + unsigned int gm_global_nid; + spinlock_t gm_lock; + long rxthread_pid[NRXTHREADS]; + int rxthread_stop_flag; + spinlock_t rxthread_flag_lock; + long rxthread_flag; + long ctthread_pid; + int ctthread_flag; + gm_alarm_t ctthread_alarm; + int small_msg_size; + int small_msg_gmsize; + gmnal_rxtwe_t *rxtwe_head; + gmnal_rxtwe_t *rxtwe_tail; + spinlock_t rxtwe_lock; + struct semaphore rxtwe_wait; +} gmnal_data_t; + +/* + * Flags to start/stop and check status of threads + * each rxthread sets 1 bit (any bit) of the flag on startup + * and clears 1 bit when exiting + */ +#define GMNAL_THREAD_RESET 0 +#define GMNAL_THREAD_STOP 666 +#define GMNAL_CTTHREAD_STARTED 333 +#define GMNAL_RXTHREADS_STARTED ( (1< +/* + * FUNCTION PROTOTYPES + */ + +/* + * Locking macros + */ /* - * Myrinet GM NAL + * For the Small tx and rx descriptor lists */ -#define NPAGES_LARGE 16 -#define NPAGES_SMALL 1 -#define MSG_LEN_LARGE NPAGES_LARGE*PAGE_SIZE -#define MSG_LEN_SMALL NPAGES_SMALL*PAGE_SIZE -#define MSG_SIZE_LARGE (gm_min_size_for_length(MSG_LEN_LARGE)) -#define MSG_SIZE_SMALL (gm_min_size_for_length(MSG_LEN_SMALL)) +#define GMNAL_TXD_LOCK_INIT(a) spin_lock_init(&a->stxd_lock); +#define GMNAL_TXD_LOCK(a) spin_lock(&a->stxd_lock); +#define GMNAL_TXD_UNLOCK(a) spin_unlock(&a->stxd_lock); +#define GMNAL_TXD_TOKEN_INIT(a, n) sema_init(&a->stxd_token, n); +#define GMNAL_TXD_GETTOKEN(a) down(&a->stxd_token); +#define GMNAL_TXD_TRYGETTOKEN(a) down_trylock(&a->stxd_token) +#define GMNAL_TXD_RETURNTOKEN(a) up(&a->stxd_token); -#define TXMSGS 64 /* Number of Transmit Messages */ -#define ENVELOPES 8 /* Number of outstanding receive msgs */ +#define GMNAL_RXT_TXD_LOCK_INIT(a) spin_lock_init(&a->rxt_stxd_lock); +#define GMNAL_RXT_TXD_LOCK(a) spin_lock(&a->rxt_stxd_lock); +#define GMNAL_RXT_TXD_UNLOCK(a) spin_unlock(&a->rxt_stxd_lock); +#define GMNAL_RXT_TXD_TOKEN_INIT(a, n) sema_init(&a->rxt_stxd_token, n); +#define GMNAL_RXT_TXD_GETTOKEN(a) down(&a->rxt_stxd_token); +#define GMNAL_RXT_TXD_TRYGETTOKEN(a) down_trylock(&a->rxt_stxd_token) +#define GMNAL_RXT_TXD_RETURNTOKEN(a) up(&a->rxt_stxd_token); -#define KGM_PORT_NUM 3 -#define KGM_HOSTNAME "kgmnal" +#define GMNAL_RXD_LOCK_INIT(a) spin_lock_init(&a->srxd_lock); +#define GMNAL_RXD_LOCK(a) spin_lock(&a->srxd_lock); +#define GMNAL_RXD_UNLOCK(a) spin_unlock(&a->srxd_lock); +#define GMNAL_RXD_TOKEN_INIT(a, n) sema_init(&a->srxd_token, n); +#define GMNAL_RXD_GETTOKEN(a) down(&a->srxd_token); +#define GMNAL_RXD_TRYGETTOKEN(a) down_trylock(&a->srxd_token) +#define GMNAL_RXD_RETURNTOKEN(a) up(&a->srxd_token); +#define GMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock); +#define GMNAL_GM_LOCK(a) spin_lock(&a->gm_lock); +#define GMNAL_GM_UNLOCK(a) spin_unlock(&a->gm_lock); +#define GMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock); -typedef struct { - char *krx_buffer; - unsigned long krx_len; - unsigned int krx_size; - unsigned int krx_priority; - struct list_head krx_item; -} kgmnal_rx_t; +/* + * Memory Allocator + */ + +/* + * API NAL + */ +int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); + +int gmnal_api_shutdown(nal_t *, int); + +int gmnal_api_validate(nal_t *, void *, size_t); + +void gmnal_api_yield(nal_t *); + +void gmnal_api_lock(nal_t *, unsigned long *); + +void gmnal_api_unlock(nal_t *, unsigned long *); + + +#define GMNAL_INIT_NAL(a) do { \ + a->forward = gmnal_api_forward; \ + a->shutdown = gmnal_api_shutdown; \ + a->validate = NULL; \ + a->yield = gmnal_api_yield; \ + a->lock = gmnal_api_lock; \ + a->unlock = gmnal_api_unlock; \ + a->timeout = NULL; \ + a->refct = 1; \ + a->nal_data = NULL; \ + } while (0) + + +/* + * CB NAL + */ + +int gmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t); + +int gmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t); + +int gmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *, + unsigned int, struct iovec *, size_t, size_t); + +int gmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *, + unsigned int, ptl_kiov_t *, size_t, size_t); + +int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t); + +int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t); + +int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *); + +void *gmnal_cb_malloc(nal_cb_t *, size_t); + +void gmnal_cb_free(nal_cb_t *, void *, size_t); + +void gmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **); + +int gmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **); + +void gmnal_cb_printf(nal_cb_t *, const char *fmt, ...); + +void gmnal_cb_cli(nal_cb_t *, unsigned long *); + +void gmnal_cb_sti(nal_cb_t *, unsigned long *); + +int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); + +nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); + +void gmnal_fini(void); + + + +#define GMNAL_INIT_NAL_CB(a) do { \ + a->cb_send = gmnal_cb_send; \ + a->cb_send_pages = gmnal_cb_send_pages; \ + a->cb_recv = gmnal_cb_recv; \ + a->cb_recv_pages = gmnal_cb_recv_pages; \ + a->cb_read = gmnal_cb_read; \ + a->cb_write = gmnal_cb_write; \ + a->cb_callback = gmnal_cb_callback; \ + a->cb_malloc = gmnal_cb_malloc; \ + a->cb_free = gmnal_cb_free; \ + a->cb_map = NULL; \ + a->cb_unmap = NULL; \ + a->cb_printf = gmnal_cb_printf; \ + a->cb_cli = gmnal_cb_cli; \ + a->cb_sti = gmnal_cb_sti; \ + a->cb_dist = gmnal_cb_dist; \ + a->nal_data = NULL; \ + } while (0) + + +/* + * Small Transmit and Receive Descriptor Functions + */ +int gmnal_alloc_stxd(gmnal_data_t *); +void gmnal_free_stxd(gmnal_data_t *); +gmnal_stxd_t* gmnal_get_stxd(gmnal_data_t *, int); +void gmnal_return_stxd(gmnal_data_t *, gmnal_stxd_t *); + +int gmnal_alloc_srxd(gmnal_data_t *); +void gmnal_free_srxd(gmnal_data_t *); +gmnal_srxd_t* gmnal_get_srxd(gmnal_data_t *, int); +void gmnal_return_srxd(gmnal_data_t *, gmnal_srxd_t *); + +/* + * general utility functions + */ +gmnal_srxd_t *gmnal_rxbuffer_to_srxd(gmnal_data_t *, void*); +void gmnal_stop_rxthread(gmnal_data_t *); +void gmnal_stop_ctthread(gmnal_data_t *); +void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); +void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t); +char *gmnal_gm_error(gm_status_t); +char *gmnal_rxevent(gm_recv_event_t*); +int gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int); +void gmnal_yield(int); +int gmnal_start_kernel_threads(gmnal_data_t *); + + +/* + * Communication functions + */ + +/* + * Receive threads + */ +int gmnal_ct_thread(void *); /* caretaker thread */ +int gmnal_rx_thread(void *); /* receive thread */ +int gmnal_pre_receive(gmnal_data_t*, gm_recv_t*, int); +int gmnal_rx_bad(gmnal_data_t *, gm_recv_t *, gmnal_srxd_t *); +int gmnal_rx_requeue_buffer(gmnal_data_t *, gmnal_srxd_t *); +int gmnal_add_rxtwe(gmnal_data_t *, gm_recv_event_t *); +gmnal_rxtwe_t * gmnal_get_rxtwe(gmnal_data_t *); +void gmnal_remove_rxtwe(gmnal_data_t *); + + +/* + * Small messages + */ +int gmnal_small_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, + struct iovec *, size_t, size_t); +int gmnal_small_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, + unsigned int, struct iovec*, int); +void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); + + + +/* + * Large messages + */ +int gmnal_large_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, + struct iovec *, size_t, size_t); -typedef struct { - nal_cb_t *ktx_nal; - void *ktx_private; - lib_msg_t *ktx_cookie; - char *ktx_buffer; - size_t ktx_len; - unsigned long ktx_size; - int ktx_ndx; - unsigned int ktx_priority; - unsigned int ktx_tgt_node; - unsigned int ktx_tgt_port_id; -} kgmnal_tx_t; +int gmnal_large_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, + struct iovec*, int); +void gmnal_large_tx_callback(gm_port_t *, void *, gm_status_t); -typedef struct { - char kgm_init; - char kgm_shuttingdown; - struct gm_port *kgm_port; - struct list_head kgm_list; - ptl_nid_t kgm_nid; - nal_cb_t *kgm_cb; - struct kgm_trans *kgm_trans; - struct tq_struct kgm_ready_tq; - spinlock_t kgm_dispatch_lock; - spinlock_t kgm_update_lock; - spinlock_t kgm_send_lock; -} kgmnal_data_t; +int gmnal_remote_get(gmnal_srxd_t *, int, struct iovec*, int, + struct iovec*); -int kgm_init(kgmnal_data_t *kgm_data); -int kgmnal_recv_thread(void *); -int gm_return_mynid(void); -void kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); +void gmnal_remote_get_callback(gm_port_t *, void *, gm_status_t); -extern kgmnal_data_t kgmnal_data; -extern nal_t kgmnal_api; -extern nal_cb_t kgmnal_lib; +int gmnal_copyiov(int, gmnal_srxd_t *, int, struct iovec*, int, + struct iovec*); -#endif /* _GMNAL_H */ +void gmnal_large_tx_ack(gmnal_data_t *, gmnal_srxd_t *); +void gmnal_large_tx_ack_callback(gm_port_t *, void *, gm_status_t); +void gmnal_large_tx_ack_received(gmnal_data_t *, gmnal_srxd_t *); +#endif /*__INCLUDE_GMNAL_H__*/ diff --git a/lnet/klnds/gmlnd/gmlnd_api.c b/lnet/klnds/gmlnd/gmlnd_api.c new file mode 100644 index 0000000..40d23db --- /dev/null +++ b/lnet/klnds/gmlnd/gmlnd_api.c @@ -0,0 +1,474 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * Implements the API NAL functions + */ + +#include "gmnal.h" + +gmnal_data_t *global_nal_data = NULL; +/* + * gmnal_api_forward + * This function takes a pack block of arguments from the NAL API + * module and passes them to the NAL CB module. The CB module unpacks + * the args and calls the appropriate function indicated by index. + * Typically this function is used to pass args between kernel and use + * space. + * As lgmanl exists entirely in kernel, just pass the arg block directly + * to the NAL CB, buy passing the args to lib_dispatch + * Arguments are + * nal_t nal Our nal + * int index the api function that initiated this call + * void *args packed block of function args + * size_t arg_len length of args block + * void *ret A return value for the API NAL + * size_t ret_len Size of the return value + * + */ + +int +gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, + void *ret, size_t ret_len) +{ + + nal_cb_t *nal_cb = NULL; + gmnal_data_t *nal_data = NULL; + + + + + + if (!nal || !args || (index < 0) || (arg_len < 0)) { + CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n"); + return (PTL_FAIL); + } + + if (ret && (ret_len <= 0)) { + CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n"); + return (PTL_FAIL); + } + + + if (!nal->nal_data) { + CDEBUG(D_ERROR, "bad nal, no nal data\n"); + return (PTL_FAIL); + } + + nal_data = nal->nal_data; + CDEBUG(D_INFO, "nal_data is [%p]\n", nal_data); + + if (!nal_data->nal_cb) { + CDEBUG(D_ERROR, "bad nal_data, no nal_cb\n"); + return (PTL_FAIL); + } + + nal_cb = nal_data->nal_cb; + CDEBUG(D_INFO, "nal_cb is [%p]\n", nal_cb); + + CDEBUG(D_PORTALS, "gmnal_api_forward calling lib_dispatch\n"); + lib_dispatch(nal_cb, NULL, index, args, ret); + CDEBUG(D_PORTALS, "gmnal_api_forward returns from lib_dispatch\n"); + + return(PTL_OK); +} + + +/* + * gmnal_api_shutdown + * Close down this interface and free any resources associated with it + * nal_t nal our nal to shutdown + */ +int +gmnal_api_shutdown(nal_t *nal, int interface) +{ + + gmnal_data_t *nal_data = nal->nal_data; + + CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data); + + return(PTL_OK); +} + + +/* + * gmnal_api_validate + * validate a user address for use in communications + * There's nothing to be done here + */ +int +gmnal_api_validate(nal_t *nal, void *base, size_t extent) +{ + + return(PTL_OK); +} + + + +/* + * gmnal_api_yield + * Give up the processor + */ +void +gmnal_api_yield(nal_t *nal) +{ + CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal); + + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + + return; +} + + + +/* + * gmnal_api_lock + * Take a threadsafe lock + */ +void +gmnal_api_lock(nal_t *nal, unsigned long *flags) +{ + + gmnal_data_t *nal_data; + nal_cb_t *nal_cb; + + nal_data = nal->nal_data; + nal_cb = nal_data->nal_cb; + + nal_cb->cb_cli(nal_cb, flags); + + return; +} + +/* + * gmnal_api_unlock + * Release a threadsafe lock + */ +void +gmnal_api_unlock(nal_t *nal, unsigned long *flags) +{ + gmnal_data_t *nal_data; + nal_cb_t *nal_cb; + + nal_data = nal->nal_data; + nal_cb = nal_data->nal_cb; + + nal_cb->cb_sti(nal_cb, flags); + + return; +} + + +nal_t * +gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, + ptl_pid_t rpid) +{ + + nal_t *nal = NULL; + nal_cb_t *nal_cb = NULL; + gmnal_data_t *nal_data = NULL; + gmnal_srxd_t *srxd = NULL; + gm_status_t gm_status; + unsigned int local_nid = 0, global_nid = 0; + ptl_nid_t portals_nid; + ptl_pid_t portals_pid = 0; + + + CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], + ac_size[%d]\n", interface, ptl_size, ac_size); + + + PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t)); + if (!nal_data) { + CDEBUG(D_ERROR, "can't get memory\n"); + return(NULL); + } + memset(nal_data, 0, sizeof(gmnal_data_t)); + /* + * set the small message buffer size + */ + nal_data->refcnt = 1; + + CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data); + CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size); + + PORTAL_ALLOC(nal, sizeof(nal_t)); + if (!nal) { + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + return(NULL); + } + memset(nal, 0, sizeof(nal_t)); + CDEBUG(D_INFO, "Allocd and reset nal[%p]\n", nal); + + PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); + if (!nal_cb) { + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + return(NULL); + } + memset(nal_cb, 0, sizeof(nal_cb_t)); + CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb); + + GMNAL_INIT_NAL(nal); + GMNAL_INIT_NAL_CB(nal_cb); + /* + * String them all together + */ + nal->nal_data = (void*)nal_data; + nal_cb->nal_data = (void*)nal_data; + nal_data->nal = nal; + nal_data->nal_cb = nal_cb; + + GMNAL_CB_LOCK_INIT(nal_data); + GMNAL_GM_LOCK_INIT(nal_data); + + + /* + * initialise the interface, + */ + CDEBUG(D_INFO, "Calling gm_init\n"); + if (gm_init() != GM_SUCCESS) { + CDEBUG(D_ERROR, "call to gm_init failed\n"); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], + name [%s], version [%d]\n", interface, GMNAL_GM_PORT, + "gmnal", GM_API_VERSION); + + GMNAL_GM_LOCK(nal_data); + gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT, "gmnal", + GM_API_VERSION); + GMNAL_GM_UNLOCK(nal_data); + + CDEBUG(D_INFO, "gm_open returned [%d]\n", gm_status); + if (gm_status == GM_SUCCESS) { + CDEBUG(D_INFO, "gm_open succeeded port[%p]\n", + nal_data->gm_port); + } else { + switch(gm_status) { + case(GM_INVALID_PARAMETER): + CDEBUG(D_ERROR, "gm_open Failure. Invalid Parameter\n"); + break; + case(GM_BUSY): + CDEBUG(D_ERROR, "gm_open Failure. GM Busy\n"); + break; + case(GM_NO_SUCH_DEVICE): + CDEBUG(D_ERROR, "gm_open Failure. No such device\n"); + break; + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib + and driver\n"); + break; + case(GM_OUT_OF_MEMORY): + CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n"); + break; + default: + CDEBUG(D_ERROR, "gm_open Failure. Unknow error + code [%d]\n", gm_status); + break; + } + GMNAL_GM_LOCK(nal_data); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + nal_data->small_msg_size = gmnal_small_msg_size; + nal_data->small_msg_gmsize = + gm_min_size_for_length(gmnal_small_msg_size); + + if (gmnal_alloc_srxd(nal_data) != GMNAL_STATUS_OK) { + CDEBUG(D_ERROR, "Failed to allocate small rx descriptors\n"); + gmnal_free_stxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + /* + * Hang out a bunch of small receive buffers + * In fact hang them all out + */ + while((srxd = gmnal_get_srxd(nal_data, 0))) { + CDEBUG(D_NET, "giving [%p] to gm_provide_recvive_buffer\n", + srxd->buffer); + GMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, + srxd->buffer, srxd->gmsize, + GM_LOW_PRIORITY, 0); + GMNAL_GM_UNLOCK(nal_data); + } + + /* + * Allocate pools of small tx buffers and descriptors + */ + if (gmnal_alloc_stxd(nal_data) != GMNAL_STATUS_OK) { + CDEBUG(D_ERROR, "Failed to allocate small tx descriptors\n"); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + gmnal_start_kernel_threads(nal_data); + + while (nal_data->rxthread_flag != GMNAL_RXTHREADS_STARTED) { + gmnal_yield(1); + CDEBUG(D_INFO, "Waiting for receive thread signs of life\n"); + } + + CDEBUG(D_INFO, "receive thread seems to have started\n"); + + + /* + * Initialise the portals library + */ + CDEBUG(D_NET, "Getting node id\n"); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_get_node_id(nal_data->gm_port, &local_nid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + CDEBUG(D_ERROR, "can't determine node id\n"); + gmnal_free_stxd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + nal_data->gm_local_nid = local_nid; + CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid, + &global_nid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_ERROR, "failed to obtain global id\n"); + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_stxd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid); + nal_data->gm_global_nid = global_nid; + +/* + pid = gm_getpid(); +*/ + CDEBUG(D_INFO, "portals_pid is [%u]\n", portals_pid); + portals_nid = (unsigned long)global_nid; + CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", portals_nid); + + CDEBUG(D_PORTALS, "calling lib_init\n"); + if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, + ac_size) != PTL_OK) { + CDEBUG(D_ERROR, "lib_init failed\n"); + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_stxd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + + } + + CDEBUG(D_INFO, "gmnal_init finished\n"); + global_nal_data = nal->nal_data; + return(nal); +} + + + +/* + * Called when module removed + */ +void gmnal_fini() +{ + gmnal_data_t *nal_data = global_nal_data; + nal_t *nal = nal_data->nal; + nal_cb_t *nal_cb = nal_data->nal_cb; + + CDEBUG(D_TRACE, "gmnal_fini\n"); + + PtlNIFini(kgmnal_ni); + lib_fini(nal_cb); + + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_stxd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); +} + +EXPORT_SYMBOL(gmnal_init); +EXPORT_SYMBOL(gmnal_fini); +EXPORT_SYMBOL(gmnal_api_forward); +EXPORT_SYMBOL(gmnal_api_validate); +EXPORT_SYMBOL(gmnal_api_yield); +EXPORT_SYMBOL(gmnal_api_lock); +EXPORT_SYMBOL(gmnal_api_unlock); +EXPORT_SYMBOL(gmnal_api_shutdown); diff --git a/lnet/klnds/gmlnd/gmlnd_cb.c b/lnet/klnds/gmlnd/gmlnd_cb.c index 4728eca..093ee64 100644 --- a/lnet/klnds/gmlnd/gmlnd_cb.c +++ b/lnet/klnds/gmlnd/gmlnd_cb.c @@ -1,517 +1,290 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Based on ksocknal and qswnal + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Robert Read + * This file is part of Lustre, http://www.lustre.org/ * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or + * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * - * Portals is distributed in the hope that it will be useful, + * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software + * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* TODO - * preallocate send buffers, store on list - * put receive buffers on queue, handle with receive threads - * use routing - */ - -#include "gmnal.h" - -extern kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *,int); - -static kgmnal_tx_t * -get_trans(void) -{ - kgmnal_tx_t *t; - PORTAL_ALLOC(t, (sizeof(kgmnal_tx_t))); - return t; -} - -static void -put_trans(kgmnal_tx_t *t) -{ - PORTAL_FREE(t, sizeof(kgmnal_tx_t)); -} - -int -kgmnal_ispeer (ptl_nid_t nid) -{ - unsigned int gmnid = (unsigned int)nid; - unsigned int nnids; - - gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids); - - return ((ptl_nid_t)gmnid == nid &&/* didn't lose high bits on conversion ? */ - gmnid < nnids); /* it's in this machine */ -} /* - * LIB functions follow - * + * This file implements the nal cb functions */ -static int -kgmnal_read (nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr, - size_t len) -{ - CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - -static int -kgmnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr, - size_t len) -{ - CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} -static void * -kgmnal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - PORTAL_ALLOC(buf, len); - return buf; -} +#include "gmnal.h" -static void -kgmnal_free(nal_cb_t *nal, void *buf, size_t len) +int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + unsigned int niov, struct iovec *iov, size_t mlen, + size_t rlen) { - PORTAL_FREE(buf, len); + gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; + int status = PTL_OK; + + + CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], + niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", + nal_cb, private, cookie, niov, iov, mlen, rlen); + + switch(srxd->type) { + case(GMNAL_SMALL_MESSAGE): + CDEBUG(D_INFO, "gmnal_cb_recv got small message\n"); + status = gmnal_small_rx(nal_cb, private, cookie, niov, + iov, mlen, rlen); + break; + case(GMNAL_LARGE_MESSAGE_INIT): + CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n"); + status = gmnal_large_rx(nal_cb, private, cookie, niov, + iov, mlen, rlen); + } + + + CDEBUG(D_INFO, "gmnal_cb_recv gmnal_return status [%d]\n", status); + return(status); } -static void -kgmnal_printf(nal_cb_t *nal, const char *fmt, ...) +int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + unsigned int kniov, ptl_kiov_t *kiov, size_t mlen, + size_t rlen) { - va_list ap; - char msg[256]; - - if (portal_debug & D_NET) { - va_start( ap, fmt ); - vsnprintf( msg, sizeof(msg), fmt, ap ); - va_end( ap ); - - printk("Lustre: CPUId: %d %s",smp_processor_id(), msg); - } + gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; + int status = PTL_OK; + struct iovec *iovec = NULL, *iovec_dup = NULL; + int i = 0; + + + CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], + cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", + nal_cb, private, cookie, kniov, kiov, mlen, rlen); + + if (srxd->type == GMNAL_SMALL_MESSAGE) { + PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov); + if (!iovec) { + CDEBUG(D_ERROR, "Can't malloc\n"); + return(GMNAL_STATUS_FAIL); + } + iovec_dup = iovec; + + /* + * map each page and create an iovec for it + */ + for (i=0; ikiov_page, kiov->kiov_len, + kiov->kiov_offset); + iovec->iov_len = kiov->kiov_len; + CDEBUG(D_INFO, "Calling kmap[%p]", kiov->kiov_page); + + iovec->iov_base = kmap(kiov->kiov_page) + + kiov->kiov_offset; + + CDEBUG(D_INFO, "iov_base is [%p]\n", iovec->iov_base); + iovec++; + kiov++; + } + CDEBUG(D_INFO, "calling gmnal_small_rx\n"); + status = gmnal_small_rx(nal_cb, private, cookie, kniov, + iovec_dup, mlen, rlen); + PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov); + } + + + CDEBUG(D_INFO, "gmnal_return status [%d]\n", status); + return(status); } -static void -kgmnal_cli(nal_cb_t *nal, unsigned long *flags) +int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int niov, struct iovec *iov, size_t len) { - kgmnal_data_t *data= nal->nal_data; - spin_lock_irqsave(&data->kgm_dispatch_lock,*flags); + gmnal_data_t *nal_data; + + + CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] len["LPSZ"] nid["LPU64"]\n", + niov, len, nid); + nal_data = nal_cb->nal_data; + + if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) { + CDEBUG(D_INFO, "This is a small message send\n"); + gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, pid, + niov, iov, len); + } else { + CDEBUG(D_ERROR, "Large message send it is not supported\n"); + lib_finalize(nal_cb, private, cookie); + return(PTL_FAIL); + gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, + niov, iov, len); + } + return(PTL_OK); } - -static void -kgmnal_sti(nal_cb_t *nal, unsigned long *flags) +int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len) { - kgmnal_data_t *data= nal->nal_data; - spin_unlock_irqrestore(&data->kgm_dispatch_lock,*flags); + int i = 0; + gmnal_data_t *nal_data; + struct iovec *iovec = NULL, *iovec_dup = NULL; + + CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len); + nal_data = nal_cb->nal_data; + PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec)); + iovec_dup = iovec; + if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) { + CDEBUG(D_INFO, "This is a small message send\n"); + + for (i=0; ikiov_page, kiov->kiov_len, + kiov->kiov_offset); + + iovec->iov_base = kmap(kiov->kiov_page) + + kiov->kiov_offset; + + iovec->iov_len = kiov->kiov_len; + iovec++; + kiov++; + } + gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, + pid, kniov, iovec_dup, len); + } else { + CDEBUG(D_ERROR, "Large message send it is not supported yet\n"); + return(PTL_FAIL); + for (i=0; ikiov_page, kiov->kiov_len, + kiov->kiov_offset); + + iovec->iov_base = kmap(kiov->kiov_page) + + kiov->kiov_offset; + iovec->iov_len = kiov->kiov_len; + iovec++; + kiov++; + } + gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, + pid, kniov, iovec, len); + } + PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec)); + return(PTL_OK); } - -static int -kgmnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) +int gmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst, + user_ptr src, size_t len) { - /* network distance doesn't mean much for this nal */ - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; + gm_bcopy(src, dst, len); + return(PTL_OK); } -/* FIXME rmr: add rounting code here */ -static void -kgmnal_tx_done(kgmnal_tx_t *trans, int error) -{ - lib_finalize(trans->ktx_nal, trans->ktx_private, trans->ktx_cookie); - - gm_dma_free(kgmnal_data.kgm_port, trans->ktx_buffer); - - trans->ktx_buffer = NULL; - trans->ktx_len = 0; - - put_trans(trans); -} -static char * gm_error_strings[GM_NUM_STATUS_CODES] = { - [GM_SUCCESS] = "GM_SUCCESS", - [GM_SEND_TIMED_OUT] = "GM_SEND_TIMED_OUT", - [GM_SEND_REJECTED] = "GM_SEND_REJECTED", - [GM_SEND_TARGET_PORT_CLOSED] = "GM_SEND_TARGET_PORT_CLOSED", - [GM_SEND_TARGET_NODE_UNREACHABLE] = "GM_SEND_TARGET_NODE_UNREACHABLE", - [GM_SEND_DROPPED] = "GM_SEND_DROPPED", - [GM_SEND_PORT_CLOSED] = "GM_SEND_PORT_CLOSED", -}; - -inline char * get_error(int status) +int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, + void *src, size_t len) { - if (gm_error_strings[status] != NULL) - return gm_error_strings[status]; - else - return "Unknown error"; + gm_bcopy(src, dst, len); + return(PTL_OK); } -static void -kgmnal_errhandler(struct gm_port *p, void *context, gm_status_t status) +int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, + ptl_event_t *ev) { - CDEBUG(D_NET,"error callback: ktx %p status %d\n", context, status); -} -static void -kgmnal_txhandler(struct gm_port *p, void *context, gm_status_t status) -{ - kgmnal_tx_t *ktx = (kgmnal_tx_t *)context; - int err = 0; - - LASSERT (p != NULL); - LASSERT (ktx != NULL); - - CDEBUG(D_NET,"ktx %p status %d nid 0x%x pid %d\n", ktx, status, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id); - - switch((int)status) { - case GM_SUCCESS: /* normal */ - break; - case GM_SEND_TIMED_OUT: /* application error */ - case GM_SEND_REJECTED: /* size of msg unacceptable */ - case GM_SEND_TARGET_PORT_CLOSED: - CERROR("%s (%d):\n", get_error(status), status); - gm_resume_sending(kgmnal_data.kgm_port, ktx->ktx_priority, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id, - kgmnal_errhandler, NULL); - err = -EIO; - break; - case GM_SEND_TARGET_NODE_UNREACHABLE: - case GM_SEND_PORT_CLOSED: - CERROR("%s (%d):\n", get_error(status), status); - gm_drop_sends(kgmnal_data.kgm_port, ktx->ktx_priority, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id, - kgmnal_errhandler, NULL); - err = -EIO; - break; - case GM_SEND_DROPPED: - CERROR("%s (%d):\n", get_error(status), status); - err = -EIO; - break; - default: - CERROR("Unknown status: %d\n", status); - err = -EIO; - break; - } - - kgmnal_tx_done(ktx, err); + if (eq->event_callback != NULL) { + CDEBUG(D_INFO, "found callback\n"); + eq->event_callback(ev); + } + + return(PTL_OK); } -/* - */ - -static int -kgmnal_send(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - int options, - unsigned int niov, - lib_md_iov_t *iov, - size_t len) +void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len) { - /* - * ipnal assumes that this is the private as passed to lib_dispatch.. - * so do we :/ - */ - kgmnal_tx_t *ktx=NULL; - int rc=0; - void * buf; - int buf_len = sizeof(ptl_hdr_t) + len; - int buf_size = 0; - - LASSERT ((options & PTL_MD_KIOV) == 0); - - PROF_START(gmnal_send); - - - CDEBUG(D_NET, "sending %d bytes from %p to nid: 0x%Lx pid %d\n", - len, iov, nid, KGM_PORT_NUM); - - /* ensure there is an available tx handle */ - - /* save transaction info to trans for later finalize and cleanup */ - ktx = get_trans(); - if (ktx == NULL) { - rc = -ENOMEM; - goto send_exit; - } - - /* hmmm... GM doesn't support vectored write, so need to allocate buffer to coalesce - header and data. - Also, memory must be dma'able or registered with GM. */ - - if (buf_len <= MSG_LEN_SMALL) { - buf_size = MSG_SIZE_SMALL; - } else if (buf_len <= MSG_LEN_LARGE) { - buf_size = MSG_SIZE_LARGE; - } else { - printk("LustreError: kgmnal:request exceeds TX MTU size (%d).\n", - MSG_SIZE_LARGE); - rc = -1; - goto send_exit; - } - - buf = gm_dma_malloc(kgmnal_data.kgm_port, buf_len); - if (buf == NULL) { - rc = -ENOMEM; - goto send_exit; - } - memcpy(buf, hdr, sizeof(ptl_hdr_t)); - - if (len != 0) - lib_copy_iov2buf(((char *)buf) + sizeof (ptl_hdr_t), - options, niov, iov, len); - - ktx->ktx_nal = nal; - ktx->ktx_private = private; - ktx->ktx_cookie = cookie; - ktx->ktx_len = buf_len; - ktx->ktx_size = buf_size; - ktx->ktx_buffer = buf; - ktx->ktx_priority = GM_LOW_PRIORITY; - ktx->ktx_tgt_node = nid; - ktx->ktx_tgt_port_id = KGM_PORT_NUM; - - CDEBUG(D_NET, "gm_send %d bytes (size %d) from %p to nid: 0x%Lx " - "pid %d pri %d\n", buf_len, buf_size, iov, nid, KGM_PORT_NUM, - GM_LOW_PRIORITY); - - gm_send_with_callback(kgmnal_data.kgm_port, buf, buf_size, - buf_len, GM_LOW_PRIORITY, - nid, KGM_PORT_NUM, - kgmnal_txhandler, ktx); - - PROF_FINISH(gmnal_send); - send_exit: - return rc; -} -void -kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - CERROR ("forwarding not implemented\n"); + void *ptr = NULL; + CDEBUG(D_TRACE, "gmnal_cb_malloc len["LPSZ"]\n", len); + PORTAL_ALLOC(ptr, len); + return(ptr); } -void -kqswnal_fwd_callback (void *arg, int error) +void gmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len) { - CERROR ("forwarding not implemented\n"); + CDEBUG(D_TRACE, "gmnal_cb_free :: buf[%p] len["LPSZ"]\n", buf, len); + PORTAL_FREE(buf, len); + return; } - -static inline void -kgmnal_requeue_rx(kgmnal_rx_t *krx) +void gmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, + void **addrkey) { - gm_provide_receive_buffer(kgmnal_data.kgm_port, krx->krx_buffer, - krx->krx_size, krx->krx_priority); + return; } -/* Process a received portals packet */ - -/* Receive Interrupt Handler */ -static void kgmnal_rx(kgmnal_data_t *kgm, unsigned long len, unsigned int size, - void * buf, unsigned int pri) +int gmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, + void**addrkey) { - ptl_hdr_t *hdr = buf; - kgmnal_rx_t krx; - - CDEBUG(D_NET,"buf %p, len %ld\n", buf, len); - - if ( len < sizeof( ptl_hdr_t ) ) { - /* XXX what's this for? */ - if (kgm->kgm_shuttingdown) - return; - CERROR("kgmnal: did not receive complete portal header, " - "len= %ld", len); - gm_provide_receive_buffer(kgm->kgm_port, buf, size, pri); - return; - } - - /* might want to use seperate threads to handle receive */ - krx.krx_buffer = buf; - krx.krx_len = len; - krx.krx_size = size; - krx.krx_priority = pri; - - if ( hdr->dest_nid == kgmnal_lib.ni.nid ) { - PROF_START(lib_parse); - lib_parse(&kgmnal_lib, (ptl_hdr_t *)krx.krx_buffer, &krx); - PROF_FINISH(lib_parse); - } else if (kgmnal_ispeer(hdr->dest_nid)) { - /* should have gone direct to peer */ - CERROR("dropping packet from 0x%llx to 0x%llx: target is " - "a peer", hdr->src_nid, hdr->dest_nid); - kgmnal_requeue_rx(&krx); - } else { - /* forward to gateway */ - CERROR("forwarding not implemented yet"); - kgmnal_requeue_rx(&krx); - } - - return; + return(PTL_OK); } - -static int kgmnal_recv(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - int options, - unsigned int niov, - lib_md_iov_t *iov, - size_t mlen, - size_t rlen) +void gmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...) { - kgmnal_rx_t *krx = private; - - LASSERT ((options & PTL_MD_KIOV) == 0); - - CDEBUG(D_NET,"mlen=%d, rlen=%d\n", mlen, rlen); - - /* What was actually received must be >= what sender claims to - * have sent. This is an LASSERT, since lib-move doesn't - * check cb return code yet. */ - LASSERT (krx->krx_len >= sizeof (ptl_hdr_t) + rlen); - LASSERT (mlen <= rlen); - - PROF_START(gmnal_recv); - - if(mlen != 0) { - PROF_START(memcpy); - lib_copy_buf2iov (options, niov, iov, - krx->krx_buffer + sizeof (ptl_hdr_t), mlen); - PROF_FINISH(memcpy); - } - - PROF_START(lib_finalize); - lib_finalize(nal, private, cookie); - PROF_FINISH(lib_finalize); - - kgmnal_requeue_rx(krx); - - PROF_FINISH(gmnal_recv); - - return rlen; + CDEBUG(D_TRACE, "gmnal_cb_printf\n"); + printk(fmt); + return; } - -static void kgmnal_shutdown(void * none) +void gmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags) { - CERROR("called\n"); - return; + gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; + + spin_lock_irqsave(&nal_data->cb_lock, *flags); + return; } -/* - * Set terminate and use alarm to wake up the recv thread. - */ -static void recv_shutdown(kgmnal_data_t *kgm) +void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags) { - gm_alarm_t alarm; + gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; - kgm->kgm_shuttingdown = 1; - gm_initialize_alarm(&alarm); - gm_set_alarm(kgm->kgm_port, &alarm, 1, kgmnal_shutdown, NULL); + spin_unlock_irqrestore(&nal_data->cb_lock, *flags); + return; } -int kgmnal_end(kgmnal_data_t *kgm) +int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist) { + CDEBUG(D_TRACE, "gmnal_cb_dist\n"); + if (dist) + *dist = 27; + return(PTL_OK); +} - /* wait for sends to finish ? */ - /* remove receive buffers */ - /* shutdown receive thread */ - recv_shutdown(kgm); - return 0; -} - -/* Used only for the spinner */ -int kgmnal_recv_thread(void *arg) -{ - kgmnal_data_t *kgm = arg; - - LASSERT(kgm != NULL); - - kportal_daemonize("kgmnal_rx"); - - while(1) { - gm_recv_event_t *e; - int priority = GM_LOW_PRIORITY; - if (kgm->kgm_shuttingdown) - break; - - e = gm_blocking_receive_no_spin(kgm->kgm_port); - if (e == NULL) { - CERROR("gm_blocking_receive returned NULL\n"); - break; - } - - switch(gm_ntohc(e->recv.type)) { - case GM_HIGH_RECV_EVENT: - priority = GM_HIGH_PRIORITY; - /* fall through */ - case GM_RECV_EVENT: - kgmnal_rx(kgm, gm_ntohl(e->recv.length), - gm_ntohc(e->recv.size), - gm_ntohp(e->recv.buffer), priority); - break; - case GM_ALARM_EVENT: - CERROR("received alarm"); - gm_unknown(kgm->kgm_port, e); - break; - case GM_BAD_SEND_DETECTED_EVENT: /* ?? */ - CERROR("received bad send!\n"); - break; - default: - gm_unknown(kgm->kgm_port, e); - } - } - - CERROR("shuttting down.\n"); - return 0; -} -nal_cb_t kgmnal_lib = { - nal_data: &kgmnal_data, /* NAL private data */ - cb_send: kgmnal_send, - cb_recv: kgmnal_recv, - cb_read: kgmnal_read, - cb_write: kgmnal_write, - cb_malloc: kgmnal_malloc, - cb_free: kgmnal_free, - cb_printf: kgmnal_printf, - cb_cli: kgmnal_cli, - cb_sti: kgmnal_sti, - cb_dist: kgmnal_dist -}; +EXPORT_SYMBOL(gmnal_cb_send); +EXPORT_SYMBOL(gmnal_cb_send_pages); +EXPORT_SYMBOL(gmnal_cb_recv); +EXPORT_SYMBOL(gmnal_cb_recv_pages); +EXPORT_SYMBOL(gmnal_cb_read); +EXPORT_SYMBOL(gmnal_cb_write); +EXPORT_SYMBOL(gmnal_cb_cli); +EXPORT_SYMBOL(gmnal_cb_sti); +EXPORT_SYMBOL(gmnal_cb_dist); +EXPORT_SYMBOL(gmnal_cb_printf); +EXPORT_SYMBOL(gmnal_cb_map); +EXPORT_SYMBOL(gmnal_cb_unmap); +EXPORT_SYMBOL(gmnal_cb_callback); +EXPORT_SYMBOL(gmnal_cb_free); +EXPORT_SYMBOL(gmnal_cb_malloc); diff --git a/lnet/klnds/gmlnd/gmlnd_comm.c b/lnet/klnds/gmlnd/gmlnd_comm.c new file mode 100644 index 0000000..9e32145 --- /dev/null +++ b/lnet/klnds/gmlnd/gmlnd_comm.c @@ -0,0 +1,1316 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * This file contains all gmnal send and receive functions + */ + +#include "gmnal.h" + +/* + * The caretaker thread + * This is main thread of execution for the NAL side + * This guy waits in gm_blocking_recvive and gets + * woken up when the myrinet adaptor gets an interrupt. + * Hands off receive operations to the receive thread + * This thread Looks after gm_callbacks etc inline. + */ +int +gmnal_ct_thread(void *arg) +{ + gmnal_data_t *nal_data; + gm_recv_event_t *rxevent = NULL; + + if (!arg) { + CDEBUG(D_TRACE, "NO nal_data. Exiting\n"); + return(-1); + } + + nal_data = (gmnal_data_t*)arg; + CDEBUG(D_TRACE, "nal_data is [%p]\n", arg); + + daemonize(); + + nal_data->ctthread_flag = GMNAL_CTTHREAD_STARTED; + + GMNAL_GM_LOCK(nal_data); + while(nal_data->ctthread_flag == GMNAL_CTTHREAD_STARTED) { + CDEBUG(D_NET, "waiting\n"); + rxevent = gm_blocking_receive_no_spin(nal_data->gm_port); + CDEBUG(D_INFO, "got [%s]\n", gmnal_rxevent(rxevent)); + if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) { + CDEBUG(D_INFO, "time to exit\n"); + break; + } + switch (GM_RECV_EVENT_TYPE(rxevent)) { + + case(GM_RECV_EVENT): + CDEBUG(D_NET, "CTTHREAD:: GM_RECV_EVENT\n"); + GMNAL_GM_UNLOCK(nal_data); + gmnal_add_rxtwe(nal_data, rxevent); + GMNAL_GM_LOCK(nal_data); + CDEBUG(D_NET, "CTTHREAD:: Added event to Q\n"); + break; + case(_GM_SLEEP_EVENT): + /* + * Blocking receive above just returns + * immediatly with _GM_SLEEP_EVENT + * Don't know what this is + */ + CDEBUG(D_NET, "Sleeping in gm_unknown\n"); + GMNAL_GM_UNLOCK(nal_data); + gm_unknown(nal_data->gm_port, rxevent); + GMNAL_GM_LOCK(nal_data); + CDEBUG(D_INFO, "Awake from gm_unknown\n"); + break; + + default: + /* + * Don't know what this is + * gm_unknown will make sense of it + * Should be able to do something with + * FAST_RECV_EVENTS here. + */ + CDEBUG(D_NET, "Passing event to gm_unknown\n"); + GMNAL_GM_UNLOCK(nal_data); + gm_unknown(nal_data->gm_port, rxevent); + GMNAL_GM_LOCK(nal_data); + CDEBUG(D_INFO, "Processed unknown event\n"); + } + } + GMNAL_GM_UNLOCK(nal_data); + nal_data->ctthread_flag = GMNAL_THREAD_RESET; + CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data); + return(GMNAL_STATUS_OK); +} + + +/* + * process a receive event + */ +int gmnal_rx_thread(void *arg) +{ + gmnal_data_t *nal_data; + gm_recv_event_t *rxevent = NULL; + gm_recv_t *recv = NULL; + void *buffer; + gmnal_rxtwe_t *we = NULL; + + if (!arg) { + CDEBUG(D_TRACE, "NO nal_data. Exiting\n"); + return(-1); + } + + nal_data = (gmnal_data_t*)arg; + CDEBUG(D_TRACE, "nal_data is [%p]\n", arg); + + daemonize(); + /* + * set 1 bit for each thread started + * doesn't matter which bit + */ + spin_lock(&nal_data->rxthread_flag_lock); + if (nal_data->rxthread_flag) + nal_data->rxthread_flag=nal_data->rxthread_flag*2 + 1; + else + nal_data->rxthread_flag = 1; + CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag); + spin_unlock(&nal_data->rxthread_flag_lock); + + while(nal_data->rxthread_stop_flag != GMNAL_THREAD_STOP) { + CDEBUG(D_NET, "RXTHREAD:: Receive thread waiting\n"); + we = gmnal_get_rxtwe(nal_data); + if (!we) { + CDEBUG(D_INFO, "Receive thread time to exit\n"); + break; + } + rxevent = we->rx; + CDEBUG(D_INFO, "thread got [%s]\n", gmnal_rxevent(rxevent)); + recv = (gm_recv_t*)&(rxevent->recv); + buffer = gm_ntohp(recv->buffer); + PORTAL_FREE(we, sizeof(gmnal_rxtwe_t)); + + switch(((gmnal_msghdr_t*)buffer)->type) { + case(GMNAL_SMALL_MESSAGE): + gmnal_pre_receive(nal_data, recv, + GMNAL_SMALL_MESSAGE); + break; + case(GMNAL_LARGE_MESSAGE_INIT): + gmnal_pre_receive(nal_data, recv, + GMNAL_LARGE_MESSAGE_INIT); + break; + case(GMNAL_LARGE_MESSAGE_ACK): + gmnal_pre_receive(nal_data, recv, + GMNAL_LARGE_MESSAGE_ACK); + break; + default: + CDEBUG(D_ERROR, "Unsupported message type\n"); + gmnal_rx_bad(nal_data, recv, NULL); + } + } + + spin_lock(&nal_data->rxthread_flag_lock); + nal_data->rxthread_flag/=2; + CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag); + spin_unlock(&nal_data->rxthread_flag_lock); + CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data); + return(GMNAL_STATUS_OK); +} + + + +/* + * Start processing a small message receive + * Get here from gmnal_receive_thread + * Hand off to lib_parse, which calls cb_recv + * which hands back to gmnal_small_receive + * Deal with all endian stuff here. + */ +int +gmnal_pre_receive(gmnal_data_t *nal_data, gm_recv_t *recv, int gmnal_type) +{ + gmnal_srxd_t *srxd = NULL; + void *buffer = NULL; + unsigned int snode, sport, type, length; + gmnal_msghdr_t *gmnal_msghdr; + ptl_hdr_t *portals_hdr; + + CDEBUG(D_INFO, "nal_data [%p], recv [%p] type [%d]\n", + nal_data, recv, gmnal_type); + + buffer = gm_ntohp(recv->buffer);; + snode = (int)gm_ntoh_u16(recv->sender_node_id); + sport = (int)gm_ntoh_u8(recv->sender_port_id); + type = (int)gm_ntoh_u8(recv->type); + buffer = gm_ntohp(recv->buffer); + length = (int) gm_ntohl(recv->length); + + gmnal_msghdr = (gmnal_msghdr_t*)buffer; + portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE); + + CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], + type [%d], length [%d], buffer [%p]\n", + snode, sport, type, length, buffer); + CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], + gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, + gmnal_msghdr->magic, gmnal_msghdr->type); + CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], + dest_node ["LPD64"]\n", portals_hdr->src_nid, + portals_hdr->dest_nid); + + + /* + * Get a receive descriptor for this message + */ + srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer); + CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n"); + srxd->nal_data = nal_data; + if (!srxd) { + CDEBUG(D_ERROR, "Failed to get receive descriptor\n"); + lib_parse(nal_data->nal_cb, portals_hdr, srxd); + return(GMNAL_STATUS_FAIL); + } + + /* + * no need to bother portals library with this + */ + if (gmnal_type == GMNAL_LARGE_MESSAGE_ACK) { + gmnal_large_tx_ack_received(nal_data, srxd); + return(GMNAL_STATUS_OK); + } + + srxd->type = gmnal_type; + srxd->nsiov = gmnal_msghdr->niov; + srxd->gm_source_node = gmnal_msghdr->sender_node_id; + + CDEBUG(D_PORTALS, "Calling lib_parse buffer is [%p]\n", + buffer+GMNAL_MSGHDR_SIZE); + /* + * control passes to lib, which calls cb_recv + * cb_recv is responsible for returning the buffer + * for future receive + */ + lib_parse(nal_data->nal_cb, portals_hdr, srxd); + + return(GMNAL_STATUS_OK); +} + + + +/* + * After a receive has been processed, + * hang out the receive buffer again. + * This implicitly returns a receive token. + */ +int +gmnal_rx_requeue_buffer(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) +{ + CDEBUG(D_TRACE, "gmnal_rx_requeue_buffer\n"); + + CDEBUG(D_NET, "requeueing srxd[%p] nal_data[%p]\n", srxd, nal_data); + + GMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0 ); + GMNAL_GM_UNLOCK(nal_data); + + return(GMNAL_STATUS_OK); +} + + +/* + * Handle a bad message + * A bad message is one we don't expect or can't interpret + */ +int +gmnal_rx_bad(gmnal_data_t *nal_data, gm_recv_t *recv, gmnal_srxd_t *srxd) +{ + CDEBUG(D_TRACE, "Can't handle message\n"); + + if (!srxd) + srxd = gmnal_rxbuffer_to_srxd(nal_data, + gm_ntohp(recv->buffer)); + if (srxd) { + gmnal_rx_requeue_buffer(nal_data, srxd); + } else { + CDEBUG(D_ERROR, "Can't find a descriptor for this buffer\n"); + /* + * get rid of it ? + */ + return(GMNAL_STATUS_FAIL); + } + + return(GMNAL_STATUS_OK); +} + + + +/* + * Process a small message receive. + * Get here from gmnal_receive_thread, gmnal_pre_receive + * lib_parse, cb_recv + * Put data from prewired receive buffer into users buffer(s) + * Hang out the receive buffer again for another receive + * Call lib_finalize + */ +int +gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) +{ + gmnal_srxd_t *srxd = NULL; + void *buffer = NULL; + gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; + + + CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen); + + if (!private) { + CDEBUG(D_ERROR, "gmnal_small_rx no context\n"); + lib_finalize(nal_cb, private, cookie); + return(PTL_FAIL); + } + + srxd = (gmnal_srxd_t*)private; + buffer = srxd->buffer; + buffer += sizeof(gmnal_msghdr_t); + buffer += sizeof(ptl_hdr_t); + + while(niov--) { + CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov, + iov->iov_len); + gm_bcopy(buffer, iov->iov_base, iov->iov_len); + buffer += iov->iov_len; + iov++; + } + + + /* + * let portals library know receive is complete + */ + CDEBUG(D_PORTALS, "calling lib_finalize\n"); + if (lib_finalize(nal_cb, private, cookie) != PTL_OK) { + /* TO DO what to do with failed lib_finalise? */ + CDEBUG(D_INFO, "lib_finalize failed\n"); + } + /* + * return buffer so it can be used again + */ + CDEBUG(D_NET, "calling gm_provide_receive_buffer\n"); + GMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0); + GMNAL_GM_UNLOCK(nal_data); + + return(PTL_OK); +} + + +/* + * Start a small transmit. + * Get a send token (and wired transmit buffer). + * Copy data from senders buffer to wired buffer and + * initiate gm_send from the wired buffer. + * The callback function informs when the send is complete. + */ +int +gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, + unsigned int niov, struct iovec *iov, int size) +{ + gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; + gmnal_stxd_t *stxd = NULL; + void *buffer = NULL; + gmnal_msghdr_t *msghdr = NULL; + int tot_size = 0; + unsigned int local_nid; + gm_status_t gm_status = GM_SUCCESS; + + CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] + hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] + iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, + global_nid, pid, niov, iov, size); + + CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n", + hdr->dest_nid, hdr->src_nid); + + if (!nal_data) { + CDEBUG(D_ERROR, "no nal_data\n"); + return(GMNAL_STATUS_FAIL); + } else { + CDEBUG(D_INFO, "nal_data [%p]\n", nal_data); + } + + GMNAL_GM_LOCK(nal_data); + gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, + &local_nid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_ERROR, "Failed to obtain local id\n"); + return(GMNAL_STATUS_FAIL); + } + CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid); + + stxd = gmnal_get_stxd(nal_data, 1); + CDEBUG(D_INFO, "stxd [%p]\n", stxd); + + stxd->type = GMNAL_SMALL_MESSAGE; + stxd->cookie = cookie; + + /* + * Copy gmnal_msg_hdr and portals header to the transmit buffer + * Then copy the data in + */ + buffer = stxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + + msghdr->magic = GMNAL_MAGIC; + msghdr->type = GMNAL_SMALL_MESSAGE; + msghdr->sender_node_id = nal_data->gm_global_nid; + CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer); + + buffer += sizeof(gmnal_msghdr_t); + + CDEBUG(D_INFO, "processing portals hdr at [%p]\n", buffer); + gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); + + buffer += sizeof(ptl_hdr_t); + + while(niov--) { + CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n", + iov, iov->iov_len, buffer); + gm_bcopy(iov->iov_base, buffer, iov->iov_len); + buffer+= iov->iov_len; + iov++; + } + + CDEBUG(D_INFO, "sending\n"); + tot_size = size+sizeof(ptl_hdr_t)+sizeof(gmnal_msghdr_t); + stxd->msg_size = tot_size; + + + CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] + gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] + stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, + stxd->msg_size, global_nid, local_nid, stxd); + + GMNAL_GM_LOCK(nal_data); + stxd->gm_priority = GM_LOW_PRIORITY; + stxd->gm_target_node = local_nid; + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, + stxd->gm_size, stxd->msg_size, + GM_LOW_PRIORITY, local_nid, + gmnal_small_tx_callback, (void*)stxd); + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "done\n"); + + return(PTL_OK); +} + + +/* + * A callback to indicate the small transmit operation is compete + * Check for erros and try to deal with them. + * Call lib_finalise to inform the client application that the send + * is complete and the memory can be reused. + * Return the stxd when finished with it (returns a send token) + */ +void +gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) +{ + gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; + lib_msg_t *cookie = stxd->cookie; + gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data; + nal_cb_t *nal_cb = nal_data->nal_cb; + + if (!stxd) { + CDEBUG(D_TRACE, "send completion event for unknown stxd\n"); + return; + } + if (status != GM_SUCCESS) { + CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s]\n", + stxd, gmnal_gm_error(status)); + } + + switch(status) { + case(GM_SUCCESS): + break; + + + + case(GM_SEND_DROPPED): + /* + * do a resend on the dropped ones + */ + CDEBUG(D_ERROR, "send stxd [%p] was dropped + resending\n", context); + GMNAL_GM_LOCK(nal_data); + gm_send_to_peer_with_callback(nal_data->gm_port, + stxd->buffer, + stxd->gm_size, + stxd->msg_size, + stxd->gm_priority, + stxd->gm_target_node, + gmnal_small_tx_callback, + context); + GMNAL_GM_UNLOCK(nal_data); + + return; + case(GM_TIMED_OUT): + case(GM_SEND_TIMED_OUT): + /* + * drop these ones + */ + CDEBUG(D_INFO, "calling gm_drop_sends\n"); + GMNAL_GM_LOCK(nal_data); + gm_drop_sends(nal_data->gm_port, stxd->gm_priority, + stxd->gm_target_node, GMNAL_GM_PORT, + gmnal_drop_sends_callback, context); + GMNAL_GM_UNLOCK(nal_data); + + return; + + + /* + * abort on these ? + */ + case(GM_TRY_AGAIN): + case(GM_INTERRUPTED): + case(GM_FAILURE): + case(GM_INPUT_BUFFER_TOO_SMALL): + case(GM_OUTPUT_BUFFER_TOO_SMALL): + case(GM_BUSY): + case(GM_MEMORY_FAULT): + case(GM_INVALID_PARAMETER): + case(GM_OUT_OF_MEMORY): + case(GM_INVALID_COMMAND): + case(GM_PERMISSION_DENIED): + case(GM_INTERNAL_ERROR): + case(GM_UNATTACHED): + case(GM_UNSUPPORTED_DEVICE): + case(GM_SEND_REJECTED): + case(GM_SEND_TARGET_PORT_CLOSED): + case(GM_SEND_TARGET_NODE_UNREACHABLE): + case(GM_SEND_PORT_CLOSED): + case(GM_NODE_ID_NOT_YET_SET): + case(GM_STILL_SHUTTING_DOWN): + case(GM_CLONE_BUSY): + case(GM_NO_SUCH_DEVICE): + case(GM_ABORTED): + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + case(GM_UNTRANSLATED_SYSTEM_ERROR): + case(GM_ACCESS_DENIED): + case(GM_NO_DRIVER_SUPPORT): + case(GM_PTE_REF_CNT_OVERFLOW): + case(GM_NOT_SUPPORTED_IN_KERNEL): + case(GM_NOT_SUPPORTED_ON_ARCH): + case(GM_NO_MATCH): + case(GM_USER_ERROR): + case(GM_DATA_CORRUPTED): + case(GM_HARDWARE_FAULT): + case(GM_SEND_ORPHANED): + case(GM_MINOR_OVERFLOW): + case(GM_PAGE_TABLE_FULL): + case(GM_UC_ERROR): + case(GM_INVALID_PORT_NUMBER): + case(GM_DEV_NOT_FOUND): + case(GM_FIRMWARE_NOT_RUNNING): + case(GM_YP_NO_MATCH): + default: + CDEBUG(D_ERROR, "Unknown send error\n"); + } + if (stxd->type == GMNAL_LARGE_MESSAGE_INIT) { + CDEBUG(D_INFO, "large transmit done\n"); + return; + } + gmnal_return_stxd(nal_data, stxd); + if (lib_finalize(nal_cb, stxd, cookie) != PTL_OK) { + CDEBUG(D_INFO, "Call to lib_finalize failed for stxd [%p]\n", + stxd); + } + return; +} + + + +void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, + gm_status_t status) +{ + gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; + gmnal_data_t *nal_data = stxd->nal_data; + + CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context); + if (status == GM_SUCCESS) { + GMNAL_GM_LOCK(nal_data); + gm_send_to_peer_with_callback(gm_port, stxd->buffer, + stxd->gm_size, stxd->msg_size, + stxd->gm_priority, + stxd->gm_target_node, + gmnal_small_tx_callback, + context); + GMNAL_GM_LOCK(nal_data); + } else { + CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is + [%d][%s]\n", stxd, status, gmnal_gm_error(status)); + } + + + return; +} + + +/* + * Begine a large transmit. + * Do a gm_register of the memory pointed to by the iovec + * and send details to the receiver. The receiver does a gm_get + * to pull the data and sends and ack when finished. Upon receipt of + * this ack, deregister the memory. Only 1 send token is required here. + */ +int +gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, + unsigned int niov, struct iovec *iov, int size) +{ + + gmnal_data_t *nal_data; + gmnal_stxd_t *stxd = NULL; + void *buffer = NULL; + gmnal_msghdr_t *msghdr = NULL; + unsigned int local_nid; + int mlen = 0; /* the size of the init message data */ + struct iovec *iov_dup = NULL; + gm_status_t gm_status; + int niov_dup; + + + CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] + hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], + iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, + global_nid, pid, niov, iov, size); + + if (nal_cb) + nal_data = (gmnal_data_t*)nal_cb->nal_data; + else { + CDEBUG(D_ERROR, "no nal_cb.\n"); + return(GMNAL_STATUS_FAIL); + } + + + /* + * Get stxd and buffer. Put local address of data in buffer, + * send local addresses to target, + * wait for the target node to suck the data over. + * The stxd is used to ren + */ + stxd = gmnal_get_stxd(nal_data, 1); + CDEBUG(D_INFO, "stxd [%p]\n", stxd); + + stxd->type = GMNAL_LARGE_MESSAGE_INIT; + stxd->cookie = cookie; + + /* + * Copy gmnal_msg_hdr and portals header to the transmit buffer + * Then copy the iov in + */ + buffer = stxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + + CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer); + + msghdr->magic = GMNAL_MAGIC; + msghdr->type = GMNAL_LARGE_MESSAGE_INIT; + msghdr->sender_node_id = nal_data->gm_global_nid; + msghdr->stxd = stxd; + msghdr->niov = niov ; + buffer += sizeof(gmnal_msghdr_t); + mlen = sizeof(gmnal_msghdr_t); + CDEBUG(D_INFO, "mlen is [%d]\n", mlen); + + + CDEBUG(D_INFO, "processing portals hdr at [%p]\n", buffer); + + gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); + buffer += sizeof(ptl_hdr_t); + mlen += sizeof(ptl_hdr_t); + CDEBUG(D_INFO, "mlen is [%d]\n", mlen); + + /* + * copy the iov to the buffer so target knows + * where to get the data from + */ + CDEBUG(D_INFO, "processing iov to [%p]\n", buffer); + gm_bcopy(iov, buffer, niov*sizeof(struct iovec)); + mlen += niov*(sizeof(struct iovec)); + CDEBUG(D_INFO, "mlen is [%d]\n", mlen); + + + /* + * Store the iovs in the stxd for we can get + * them later if we need them + */ + CDEBUG(D_NET, "Copying iov [%p] to [%p]\n", iov, stxd->iov); + gm_bcopy(iov, stxd->iov, niov*sizeof(struct iovec)); + stxd->niov = niov; + + + /* + * register the memory so the NIC can get hold of the data + * This is a slow process. it'd be good to overlap it + * with something else. + */ + iov_dup = iov; + niov_dup = niov; + while(niov--) { + CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n", + iov->iov_base, iov->iov_len); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, + iov->iov_base, iov->iov_len); + if (gm_status != GM_SUCCESS) { + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] + for memory [%p] len ["LPSZ"]\n", + gm_status, gmnal_gm_error(gm_status), + iov->iov_base, iov->iov_len); + GMNAL_GM_LOCK(nal_data); + while (iov_dup != iov) { + gm_deregister_memory(nal_data->gm_port, + iov_dup->iov_base, + iov_dup->iov_len); + iov_dup++; + } + GMNAL_GM_UNLOCK(nal_data); + gmnal_return_stxd(nal_data, stxd); + return(PTL_FAIL); + } + + GMNAL_GM_UNLOCK(nal_data); + iov++; + } + + /* + * Send the init message to the target + */ + CDEBUG(D_INFO, "sending mlen [%d]\n", mlen); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, + &local_nid); + if (gm_status != GM_SUCCESS) { + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_ERROR, "Failed to obtain local id\n"); + gmnal_return_stxd(nal_data, stxd); + /* TO DO deregister memory on failure */ + return(GMNAL_STATUS_FAIL); + } + CDEBUG(D_INFO, "Local Node_id is [%d]\n", local_nid); + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, + stxd->gm_size, mlen, GM_LOW_PRIORITY, + local_nid, gmnal_large_tx_callback, + (void*)stxd); + GMNAL_GM_UNLOCK(nal_data); + + CDEBUG(D_INFO, "done\n"); + + return(PTL_OK); +} + +/* + * Callback function indicates that send of buffer with + * large message iovec has completed (or failed). + */ +void +gmnal_large_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) +{ + gmnal_small_tx_callback(gm_port, context, status); + +} + + + +/* + * Have received a buffer that contains an iovec of the sender. + * Do a gm_register_memory of the receivers buffer and then do a get + * data from the sender. + */ +int +gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + unsigned int nriov, struct iovec *riov, size_t mlen, + size_t rlen) +{ + gmnal_data_t *nal_data = nal_cb->nal_data; + gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; + void *buffer = NULL; + struct iovec *riov_dup; + int nriov_dup; + gmnal_msghdr_t *msghdr = NULL; + gm_status_t gm_status; + + CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], + cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n", + nal_cb, private, cookie, nriov, riov, mlen, rlen); + + if (!srxd) { + CDEBUG(D_ERROR, "gmnal_large_rx no context\n"); + lib_finalize(nal_cb, private, cookie); + return(PTL_FAIL); + } + + buffer = srxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + buffer += sizeof(gmnal_msghdr_t); + buffer += sizeof(ptl_hdr_t); + + /* + * Store the senders stxd address in the srxd for this message + * The gmnal_large_message_ack needs it to notify the sender + * the pull of data is complete + */ + srxd->source_stxd = msghdr->stxd; + + /* + * Register the receivers memory + * get the data, + * tell the sender that we got the data + * then tell the receiver we got the data + */ + nriov_dup = nriov; + riov_dup = riov; + while(nriov--) { + CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n", + riov->iov_base, riov->iov_len); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, + riov->iov_base, riov->iov_len); + if (gm_status != GM_SUCCESS) { + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] + for memory [%p] len ["LPSZ"]\n", + gm_status, gmnal_gm_error(gm_status), + riov->iov_base, riov->iov_len); + GMNAL_GM_LOCK(nal_data); + while (riov_dup != riov) { + gm_deregister_memory(nal_data->gm_port, + riov_dup->iov_base, + riov_dup->iov_len); + riov_dup++; + } + GMNAL_GM_LOCK(nal_data); + /* + * give back srxd and buffer. Send NACK to sender + */ + return(PTL_FAIL); + } + GMNAL_GM_UNLOCK(nal_data); + riov++; + } + /* + * do this so the final gm_get callback can deregister the memory + */ + PORTAL_ALLOC(srxd->riov, nriov_dup*(sizeof(struct iovec))); + gm_bcopy(riov_dup, srxd->riov, nriov_dup*(sizeof(struct iovec))); + srxd->nriov = nriov_dup; + + /* + * now do gm_get to get the data + */ + srxd->cookie = cookie; + if (gmnal_remote_get(srxd, srxd->nsiov, (struct iovec*)buffer, + nriov_dup, riov_dup) != GMNAL_STATUS_OK) { + CDEBUG(D_ERROR, "can't get the data"); + } + + CDEBUG(D_INFO, "lgmanl_large_rx done\n"); + + return(PTL_OK); +} + + +/* + * Perform a number of remote gets as part of receiving + * a large message. + * The final one to complete (i.e. the last callback to get called) + * tidies up. + * gm_get requires a send token. + */ +int +gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov, + int nriov, struct iovec *riov) +{ + + int ncalls = 0; + + CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], + nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov); + + + ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov); + if (ncalls < 0) { + CDEBUG(D_ERROR, "there's something wrong with the iovecs\n"); + return(GMNAL_STATUS_FAIL); + } + CDEBUG(D_INFO, "gmnal_remote_get ncalls [%d]\n", ncalls); + spin_lock_init(&srxd->callback_lock); + srxd->ncallbacks = ncalls; + srxd->callback_status = 0; + + ncalls = gmnal_copyiov(1, srxd, nsiov, siov, nriov, riov); + if (ncalls < 0) { + CDEBUG(D_ERROR, "there's something wrong with the iovecs\n"); + return(GMNAL_STATUS_FAIL); + } + + return(GMNAL_STATUS_OK); + +} + + +/* + * pull data from source node (source iovec) to a local iovec. + * The iovecs may not match which adds the complications below. + * Count the number of gm_gets that will be required to the callbacks + * can determine who is the last one. + */ +int +gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov, + struct iovec *siov, int nriov, struct iovec *riov) +{ + + int ncalls = 0; + int slen = siov->iov_len, rlen = riov->iov_len; + char *sbuf = siov->iov_base, *rbuf = riov->iov_base; + unsigned long sbuf_long; + gm_remote_ptr_t remote_ptr = 0; + unsigned int source_node; + gmnal_stxd_t *stxd = NULL; + gmnal_data_t *nal_data = srxd->nal_data; + + CDEBUG(D_TRACE, "copy[%d] nal_data[%p]\n", do_copy, nal_data); + if (do_copy) { + if (!nal_data) { + CDEBUG(D_ERROR, "Bad args No nal_data\n"); + return(GMNAL_STATUS_FAIL); + } + GMNAL_GM_LOCK(nal_data); + if (gm_global_id_to_node_id(nal_data->gm_port, + srxd->gm_source_node, + &source_node) != GM_SUCCESS) { + + CDEBUG(D_ERROR, "cannot resolve global_id [%u] + to local node_id\n", srxd->gm_source_node); + GMNAL_GM_UNLOCK(nal_data); + return(GMNAL_STATUS_FAIL); + } + GMNAL_GM_UNLOCK(nal_data); + /* + * We need a send token to use gm_get + * getting an stxd gets us a send token. + * the stxd is used as the context to the + * callback function (so stxd can be returned). + * Set pointer in stxd to srxd so callback count in srxd + * can be decremented to find last callback to complete + */ + stxd = gmnal_get_stxd(nal_data, 1); + stxd->srxd = srxd; + CDEBUG(D_INFO, "gmnal_copyiov source node is G[%u]L[%d]\n", + srxd->gm_source_node, source_node); + } + + do { + CDEBUG(D_INFO, "sbuf[%p] slen[%d] rbuf[%p], rlen[%d]\n", + sbuf, slen, rbuf, rlen); + if (slen > rlen) { + ncalls++; + if (do_copy) { + CDEBUG(D_INFO, "slen>rlen\n"); + GMNAL_GM_LOCK(nal_data); + /* + * funny business to get rid + * of compiler warning + */ + sbuf_long = (unsigned long) sbuf; + remote_ptr = (gm_remote_ptr_t)sbuf_long; + gm_get(nal_data->gm_port, remote_ptr, rbuf, + rlen, GM_LOW_PRIORITY, source_node, + GMNAL_GM_PORT, + gmnal_remote_get_callback, stxd); + GMNAL_GM_UNLOCK(nal_data); + } + /* + * at the end of 1 iov element + */ + sbuf+=rlen; + slen-=rlen; + riov++; + nriov--; + rbuf = riov->iov_base; + rlen = riov->iov_len; + } else if (rlen > slen) { + ncalls++; + if (do_copy) { + CDEBUG(D_INFO, "slengm_port, remote_ptr, rbuf, + slen, GM_LOW_PRIORITY, source_node, + GMNAL_GM_PORT, + gmnal_remote_get_callback, stxd); + GMNAL_GM_UNLOCK(nal_data); + } + /* + * at end of siov element + */ + rbuf+=slen; + rlen-=slen; + siov++; + sbuf = siov->iov_base; + slen = siov->iov_len; + } else { + ncalls++; + if (do_copy) { + CDEBUG(D_INFO, "rlen=slen\n"); + GMNAL_GM_LOCK(nal_data); + sbuf_long = (unsigned long) sbuf; + remote_ptr = (gm_remote_ptr_t)sbuf_long; + gm_get(nal_data->gm_port, remote_ptr, rbuf, + rlen, GM_LOW_PRIORITY, source_node, + GMNAL_GM_PORT, + gmnal_remote_get_callback, stxd); + GMNAL_GM_UNLOCK(nal_data); + } + /* + * at end of siov and riov element + */ + siov++; + sbuf = siov->iov_base; + slen = siov->iov_len; + riov++; + nriov--; + rbuf = riov->iov_base; + rlen = riov->iov_len; + } + + } while (nriov); + return(ncalls); +} + + +/* + * The callback function that is invoked after each gm_get call completes. + * Multiple callbacks may be invoked for 1 transaction, only the final + * callback has work to do. + */ +void +gmnal_remote_get_callback(gm_port_t *gm_port, void *context, + gm_status_t status) +{ + + gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; + gmnal_srxd_t *srxd = stxd->srxd; + nal_cb_t *nal_cb = srxd->nal_data->nal_cb; + int lastone; + struct iovec *riov; + int nriov; + gmnal_data_t *nal_data; + + CDEBUG(D_TRACE, "called for context [%p]\n", context); + + if (status != GM_SUCCESS) { + CDEBUG(D_ERROR, "reports error [%d][%s]\n", status, + gmnal_gm_error(status)); + } + + spin_lock(&srxd->callback_lock); + srxd->ncallbacks--; + srxd->callback_status |= status; + lastone = srxd->ncallbacks?0:1; + spin_unlock(&srxd->callback_lock); + nal_data = srxd->nal_data; + + /* + * everyone returns a send token + */ + gmnal_return_stxd(nal_data, stxd); + + if (!lastone) { + CDEBUG(D_ERROR, "NOT final callback context[%p]\n", srxd); + return; + } + + /* + * Let our client application proceed + */ + CDEBUG(D_ERROR, "final callback context[%p]\n", srxd); + if (lib_finalize(nal_cb, srxd, srxd->cookie) != PTL_OK) { + CDEBUG(D_INFO, "Call to lib_finalize failed for srxd [%p]\n", + srxd); + } + + /* + * send an ack to the sender to let him know we got the data + */ + gmnal_large_tx_ack(nal_data, srxd); + + /* + * Unregister the memory that was used + * This is a very slow business (slower then register) + */ + nriov = srxd->nriov; + riov = srxd->riov; + GMNAL_GM_LOCK(nal_data); + while (nriov--) { + CDEBUG(D_ERROR, "deregister memory [%p]\n", riov->iov_base); + if (gm_deregister_memory(srxd->nal_data->gm_port, + riov->iov_base, riov->iov_len)) { + CDEBUG(D_ERROR, "failed to deregister memory [%p]\n", + riov->iov_base); + } + riov++; + } + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(srxd->riov, sizeof(struct iovec)*nriov); + + /* + * repost the receive buffer (return receive token) + */ + GMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0); + GMNAL_GM_UNLOCK(nal_data); + + return; +} + + +/* + * Called on target node. + * After pulling data from a source node + * send an ack message to indicate the large transmit is complete. + */ +void +gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) +{ + + gmnal_stxd_t *stxd; + gmnal_msghdr_t *msghdr; + void *buffer = NULL; + unsigned int local_nid; + gm_status_t gm_status = GM_SUCCESS; + + CDEBUG(D_TRACE, "srxd[%p] target_node [%u]\n", srxd, + srxd->gm_source_node); + + GMNAL_GM_LOCK(nal_data); + gm_status = gm_global_id_to_node_id(nal_data->gm_port, + srxd->gm_source_node, &local_nid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_ERROR, "Failed to obtain local id\n"); + return; + } + CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid); + + stxd = gmnal_get_stxd(nal_data, 1); + CDEBUG(D_TRACE, "gmnal_large_tx_ack got stxd[%p]\n", stxd); + + stxd->nal_data = nal_data; + stxd->type = GMNAL_LARGE_MESSAGE_ACK; + + /* + * Copy gmnal_msg_hdr and portals header to the transmit buffer + * Then copy the data in + */ + buffer = stxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + + /* + * Add in the address of the original stxd from the sender node + * so it knows which thread to notify. + */ + msghdr->magic = GMNAL_MAGIC; + msghdr->type = GMNAL_LARGE_MESSAGE_ACK; + msghdr->sender_node_id = nal_data->gm_global_nid; + msghdr->stxd = srxd->source_stxd; + CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer); + + CDEBUG(D_INFO, "sending\n"); + stxd->msg_size= sizeof(gmnal_msghdr_t); + + + CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] + gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] + stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, + stxd->msg_size, srxd->gm_source_node, local_nid, stxd); + GMNAL_GM_LOCK(nal_data); + stxd->gm_priority = GM_LOW_PRIORITY; + stxd->gm_target_node = local_nid; + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, + stxd->gm_size, stxd->msg_size, + GM_LOW_PRIORITY, local_nid, + gmnal_large_tx_ack_callback, + (void*)stxd); + + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "gmnal_large_tx_ack :: done\n"); + + return; +} + + +/* + * A callback to indicate the small transmit operation is compete + * Check for errors and try to deal with them. + * Call lib_finalise to inform the client application that the + * send is complete and the memory can be reused. + * Return the stxd when finished with it (returns a send token) + */ +void +gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context, + gm_status_t status) +{ + gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; + gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data; + + if (!stxd) { + CDEBUG(D_ERROR, "send completion event for unknown stxd\n"); + return; + } + CDEBUG(D_TRACE, "send completion event for stxd [%p] status is [%d]\n", + stxd, status); + gmnal_return_stxd(stxd->nal_data, stxd); + + GMNAL_GM_UNLOCK(nal_data); + return; +} + +/* + * Indicates the large transmit operation is compete. + * Called on transmit side (means data has been pulled by receiver + * or failed). + * Call lib_finalise to inform the client application that the send + * is complete, deregister the memory and return the stxd. + * Finally, report the rx buffer that the ack message was delivered in. + */ +void +gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) +{ + nal_cb_t *nal_cb = nal_data->nal_cb; + gmnal_stxd_t *stxd = NULL; + gmnal_msghdr_t *msghdr = NULL; + void *buffer = NULL; + struct iovec *iov; + + + CDEBUG(D_TRACE, "gmnal_large_tx_ack_received buffer [%p]\n", buffer); + + buffer = srxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + stxd = msghdr->stxd; + + CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd); + + if (lib_finalize(nal_cb, stxd, stxd->cookie) != PTL_OK) { + CDEBUG(D_INFO, "Call to lib_finalize failed for stxd [%p]\n", + stxd); + } + + /* + * extract the iovec from the stxd, deregister the memory. + * free the space used to store the iovec + */ + iov = stxd->iov; + while(stxd->niov--) { + CDEBUG(D_INFO, "deregister memory [%p] size ["LPSZ"]\n", + iov->iov_base, iov->iov_len); + GMNAL_GM_LOCK(nal_data); + gm_deregister_memory(nal_data->gm_port, iov->iov_base, + iov->iov_len); + GMNAL_GM_UNLOCK(nal_data); + iov++; + } + + /* + * return the send token + * TO DO It is bad to hold onto the send token so long? + */ + gmnal_return_stxd(nal_data, stxd); + + + /* + * requeue the receive buffer + */ + gmnal_rx_requeue_buffer(nal_data, srxd); + + + return; +} + + + + +EXPORT_SYMBOL(gmnal_rx_thread); +EXPORT_SYMBOL(gmnal_ct_thread); +EXPORT_SYMBOL(gmnal_pre_receive); +EXPORT_SYMBOL(gmnal_rx_requeue_buffer); +EXPORT_SYMBOL(gmnal_rx_bad); +EXPORT_SYMBOL(gmnal_small_rx); +EXPORT_SYMBOL(gmnal_large_tx); +EXPORT_SYMBOL(gmnal_large_tx_callback); +EXPORT_SYMBOL(gmnal_small_tx_callback); diff --git a/lnet/klnds/gmlnd/gmlnd_module.c b/lnet/klnds/gmlnd/gmlnd_module.c new file mode 100644 index 0000000..8e0f64c --- /dev/null +++ b/lnet/klnds/gmlnd/gmlnd_module.c @@ -0,0 +1,147 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "gmnal.h" + + +int gmnal_small_msg_size = 525312; +/* + * -1 indicates default value. + * This is 1 thread per cpu + * See start_kernel_threads + */ +int num_rx_threads = -1; +int num_stxds = 5; + +ptl_handle_ni_t kgmnal_ni; + + +int +gmnal_cmd(struct portal_ioctl_data *data, void *private) +{ + gmnal_data_t *nal_data = NULL; + char *name = NULL; + int nid = -2; + int gnid; + gm_status_t gm_status; + + + CDEBUG(D_TRACE, "gmnal_cmd [%d] private [%p]\n", + data->ioc_nal_cmd, private); + nal_data = (gmnal_data_t*)private; + switch(data->ioc_nal_cmd) { + /* + * just reuse already defined GET_NID. Should define GMNAL version + */ + case(GMNAL_IOC_GET_GNID): + + PORTAL_ALLOC(name, data->ioc_plen1); + copy_from_user(name, data->ioc_pbuf1, data->ioc_plen1); + + GMNAL_GM_LOCK(nal_data); + nid = gm_host_name_to_node_id(nal_data->gm_port, name); + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "Local node id is [%d]\n", nid); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_node_id_to_global_id(nal_data->gm_port, + nid, &gnid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_INFO, "gm_node_id_to_global_id failed[%d]\n", + gm_status); + return(-1); + } + CDEBUG(D_INFO, "Global node is is [%u][%x]\n", gnid, gnid); + copy_to_user(data->ioc_pbuf2, &gnid, data->ioc_plen2); + break; + default: + CDEBUG(D_INFO, "gmnal_cmd UNKNOWN[%d]\n", data->ioc_nal_cmd); + data->ioc_nid2 = -1; + } + + + return(0); +} + + +static int __init +gmnal_load(void) +{ + int status; + CDEBUG(D_TRACE, "This is the gmnal module initialisation routine\n"); + + + + CDEBUG(D_INFO, "Calling gmnal_init\n"); + status = PtlNIInit(gmnal_init, 32, 4, 0, &kgmnal_ni); + if (status == PTL_OK) { + CDEBUG(D_INFO, "Portals GMNAL initialised ok kgmnal_ni\n"); + } else { + CDEBUG(D_INFO, "Portals GMNAL Failed to initialise\n"); + return(1); + + } + + CDEBUG(D_INFO, "Calling kportal_nal_register\n"); + /* + * global_nal_data is set by gmnal_init + */ + if (kportal_nal_register(GMNAL, &gmnal_cmd, global_nal_data) != 0) { + CDEBUG(D_INFO, "kportal_nal_register failed\n"); + return(1); + } + + CDEBUG(D_INFO, "Calling PORTAL_SYMBOL_REGISTER\n"); + PORTAL_SYMBOL_REGISTER(kgmnal_ni); + CDEBUG(D_INFO, "This is the end of the gmnal init routine"); + + + return(0); +} + + +static void __exit +gmnal_unload(void) +{ + + kportal_nal_unregister(GMNAL); + PORTAL_SYMBOL_UNREGISTER(kgmnal_ni); + gmnal_fini(); + global_nal_data = NULL; + return; +} + + +module_init(gmnal_load); + +module_exit(gmnal_unload); + +EXPORT_SYMBOL(kgmnal_ni); + +MODULE_PARM(gmnal_small_msg_size, "i"); +MODULE_PARM(num_rx_threads, "i"); +MODULE_PARM(num_stxds, "i"); + +MODULE_AUTHOR("Morgan Doyle"); + +MODULE_DESCRIPTION("A Portals kernel NAL for Myrinet GM."); + +MODULE_LICENSE("GPL"); diff --git a/lnet/klnds/gmlnd/gmlnd_utils.c b/lnet/klnds/gmlnd/gmlnd_utils.c new file mode 100644 index 0000000..84fc3a0 --- /dev/null +++ b/lnet/klnds/gmlnd/gmlnd_utils.c @@ -0,0 +1,1007 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * All utilities required by lgmanl + */ + +#include "gmnal.h" + +/* + * Am I one of the gmnal rxthreads ? + */ +int +gmnal_is_rxthread(gmnal_data_t *nal_data) +{ + int i; + for (i=0; irxthread_pid[i] == current->pid) + return(1); + } + return(0); +} + + +/* + * allocate a number of small tx buffers and register with GM + * so they are wired and set up for DMA. This is a costly operation. + * Also allocate a corrosponding descriptor to keep track of + * the buffer. + * Put all descriptors on singly linked list to be available to send + * function. + */ +int +gmnal_alloc_stxd(gmnal_data_t *nal_data) +{ + int ntx = 0, nstx = 0, i = 0, nrxt_stx = 10; + gmnal_stxd_t *txd = NULL; + void *txbuffer = NULL; + + CDEBUG(D_TRACE, "gmnal_alloc_small tx\n"); + + GMNAL_GM_LOCK(nal_data); + ntx = gm_num_send_tokens(nal_data->gm_port); + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "total number of send tokens available is [%d]\n", ntx); + + nstx = ntx/2; + /* + * num_stxds from gmnal_module.c + */ + nstx = num_stxds; + nrxt_stx = nstx + 1; + + CDEBUG(D_INFO, "Allocated [%d] send tokens to small messages\n", nstx); + + + /* + * A semaphore is initialised with the + * number of transmit tokens available. + * To get a stxd, acquire the token semaphore. + * this decrements the available token count + * (if no tokens you block here, someone returning a + * stxd will release the semaphore and wake you) + * When token is obtained acquire the spinlock + * to manipulate the list + */ + GMNAL_TXD_TOKEN_INIT(nal_data, nstx); + GMNAL_TXD_LOCK_INIT(nal_data); + GMNAL_RXT_TXD_TOKEN_INIT(nal_data, nrxt_stx); + GMNAL_RXT_TXD_LOCK_INIT(nal_data); + + for (i=0; i<=nstx; i++) { + PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t)); + if (!txd) { + CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i); + return(GMNAL_STATUS_NOMEM); + } + GMNAL_GM_LOCK(nal_data); + txbuffer = gm_dma_malloc(nal_data->gm_port, + GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_UNLOCK(nal_data); + if (!txbuffer) { + CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d], + size [%d]\n", i, + GMNAL_SMALL_MSG_SIZE(nal_data)); + PORTAL_FREE(txd, sizeof(gmnal_stxd_t)); + return(GMNAL_STATUS_FAIL); + } + txd->buffer = txbuffer; + txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data); + txd->gm_size = gm_min_size_for_length(txd->buffer_size); + txd->nal_data = (struct _gmnal_data_t*)nal_data; + txd->rxt = 0; + + txd->next = nal_data->stxd; + nal_data->stxd = txd; + CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], + size [%d]\n", txd, txd->buffer, txd->buffer_size); + } + + for (i=0; i<=nrxt_stx; i++) { + PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t)); + if (!txd) { + CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i); + return(GMNAL_STATUS_NOMEM); + } + GMNAL_GM_LOCK(nal_data); + txbuffer = gm_dma_malloc(nal_data->gm_port, + GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_UNLOCK(nal_data); + if (!txbuffer) { + CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d], + size [%d]\n", i, + GMNAL_SMALL_MSG_SIZE(nal_data)); + PORTAL_FREE(txd, sizeof(gmnal_stxd_t)); + return(GMNAL_STATUS_FAIL); + } + txd->buffer = txbuffer; + txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data); + txd->gm_size = gm_min_size_for_length(txd->buffer_size); + txd->nal_data = (struct _gmnal_data_t*)nal_data; + txd->rxt = 1; + + txd->next = nal_data->rxt_stxd; + nal_data->rxt_stxd = txd; + CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], + size [%d]\n", txd, txd->buffer, txd->buffer_size); + } + + return(GMNAL_STATUS_OK); +} + +/* Free the list of wired and gm_registered small tx buffers and + * the tx descriptors that go along with them. + */ +void +gmnal_free_stxd(gmnal_data_t *nal_data) +{ + gmnal_stxd_t *txd = nal_data->stxd, *_txd = NULL; + + CDEBUG(D_TRACE, "gmnal_free_small tx\n"); + + while(txd) { + CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], + size [%d]\n", txd, txd->buffer, txd->buffer_size); + _txd = txd; + txd = txd->next; + GMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _txd->buffer); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_txd, sizeof(gmnal_stxd_t)); + } + txd = nal_data->rxt_stxd; + while(txd) { + CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], + size [%d]\n", txd, txd->buffer, txd->buffer_size); + _txd = txd; + txd = txd->next; + GMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _txd->buffer); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_txd, sizeof(gmnal_stxd_t)); + } + return; +} + + +/* + * Get a txd from the list + * This get us a wired and gm_registered small tx buffer. + * This implicitly gets us a send token also. + */ +gmnal_stxd_t * +gmnal_get_stxd(gmnal_data_t *nal_data, int block) +{ + + gmnal_stxd_t *txd = NULL; + pid_t pid = current->pid; + + + CDEBUG(D_TRACE, "gmnal_get_stxd nal_data [%p] block[%d] pid [%d]\n", + nal_data, block, pid); + + if (gmnal_is_rxthread(nal_data)) { + CDEBUG(D_INFO, "RXTHREAD Attempting to get token\n"); + GMNAL_RXT_TXD_GETTOKEN(nal_data); + GMNAL_RXT_TXD_LOCK(nal_data); + txd = nal_data->rxt_stxd; + if (txd) + nal_data->rxt_stxd = txd->next; + GMNAL_RXT_TXD_UNLOCK(nal_data); + CDEBUG(D_INFO, "RXTHREAD got [%p], head is [%p]\n", + txd, nal_data->rxt_stxd); + txd->kniov = 0; + txd->rxt = 1; + } else { + if (block) { + CDEBUG(D_INFO, "Attempting to get token\n"); + GMNAL_TXD_GETTOKEN(nal_data); + CDEBUG(D_PORTALS, "Got token\n"); + } else { + if (GMNAL_TXD_TRYGETTOKEN(nal_data)) { + CDEBUG(D_ERROR, "can't get token\n"); + return(NULL); + } + } + GMNAL_TXD_LOCK(nal_data); + txd = nal_data->stxd; + if (txd) + nal_data->stxd = txd->next; + GMNAL_TXD_UNLOCK(nal_data); + CDEBUG(D_INFO, "got [%p], head is [%p]\n", txd, + nal_data->stxd); + txd->kniov = 0; + } /* general txd get */ + return(txd); +} + +/* + * Return a txd to the list + */ +void +gmnal_return_stxd(gmnal_data_t *nal_data, gmnal_stxd_t *txd) +{ + CDEBUG(D_TRACE, "nal_data [%p], txd[%p] rxt[%d]\n", nal_data, + txd, txd->rxt); + + /* + * this transmit descriptor is + * for the rxthread + */ + if (txd->rxt) { + GMNAL_RXT_TXD_LOCK(nal_data); + txd->next = nal_data->rxt_stxd; + nal_data->rxt_stxd = txd; + GMNAL_RXT_TXD_UNLOCK(nal_data); + GMNAL_RXT_TXD_RETURNTOKEN(nal_data); + CDEBUG(D_INFO, "Returned stxd to rxthread list\n"); + } else { + GMNAL_TXD_LOCK(nal_data); + txd->next = nal_data->stxd; + nal_data->stxd = txd; + GMNAL_TXD_UNLOCK(nal_data); + GMNAL_TXD_RETURNTOKEN(nal_data); + CDEBUG(D_INFO, "Returned stxd to general list\n"); + } + return; +} + + +/* + * allocate a number of small rx buffers and register with GM + * so they are wired and set up for DMA. This is a costly operation. + * Also allocate a corrosponding descriptor to keep track of + * the buffer. + * Put all descriptors on singly linked list to be available to + * receive thread. + */ +int +gmnal_alloc_srxd(gmnal_data_t *nal_data) +{ + int nrx = 0, nsrx = 0, i = 0; + gmnal_srxd_t *rxd = NULL; + void *rxbuffer = NULL; + + CDEBUG(D_TRACE, "gmnal_alloc_small rx\n"); + + GMNAL_GM_LOCK(nal_data); + nrx = gm_num_receive_tokens(nal_data->gm_port); + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "total number of receive tokens available is [%d]\n", + nrx); + + nsrx = nrx/2; + nsrx = 12; + /* + * make the number of rxds twice our total + * number of stxds plus 1 + */ + nsrx = num_stxds*2 + 2; + + CDEBUG(D_INFO, "Allocated [%d] receive tokens to small messages\n", + nsrx); + + + GMNAL_GM_LOCK(nal_data); + nal_data->srxd_hash = gm_create_hash(gm_hash_compare_ptrs, + gm_hash_hash_ptr, 0, 0, nsrx, 0); + GMNAL_GM_UNLOCK(nal_data); + if (!nal_data->srxd_hash) { + CDEBUG(D_ERROR, "Failed to create hash table\n"); + return(GMNAL_STATUS_NOMEM); + } + + GMNAL_RXD_TOKEN_INIT(nal_data, nsrx); + GMNAL_RXD_LOCK_INIT(nal_data); + + for (i=0; i<=nsrx; i++) { + PORTAL_ALLOC(rxd, sizeof(gmnal_srxd_t)); + if (!rxd) { + CDEBUG(D_ERROR, "Failed to malloc rxd [%d]\n", i); + return(GMNAL_STATUS_NOMEM); + } +#if 0 + PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data)); + if (!rxbuffer) { + CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], + size [%d]\n", i, + GMNAL_SMALL_MSG_SIZE(nal_data)); + PORTAL_FREE(rxd, sizeof(gmnal_srxd_t)); + return(GMNAL_STATUS_FAIL); + } + CDEBUG(D_NET, "Calling gm_register_memory with port [%p] + rxbuffer [%p], size [%d]\n", nal_data->gm_port, + rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, + GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p], + index [%d]\n", rxbuffer, i); + switch(gm_status) { + case(GM_FAILURE): + CDEBUG(D_ERROR, "GM_FAILURE\n"); + break; + case(GM_PERMISSION_DENIED): + CDEBUG(D_ERROR, "PERMISSION_DENIED\n"); + break; + case(GM_INVALID_PARAMETER): + CDEBUG(D_ERROR, "INVALID_PARAMETER\n"); + break; + default: + CDEBUG(D_ERROR, "Unknown error[%d]\n", + gm_status); + break; + + } + return(GMNAL_STATUS_FAIL); + } +#else + GMNAL_GM_LOCK(nal_data); + rxbuffer = gm_dma_malloc(nal_data->gm_port, + GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_UNLOCK(nal_data); + if (!rxbuffer) { + CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d], + size [%d]\n", i, + GMNAL_SMALL_MSG_SIZE(nal_data)); + PORTAL_FREE(rxd, sizeof(gmnal_srxd_t)); + return(GMNAL_STATUS_FAIL); + } +#endif + + rxd->buffer = rxbuffer; + rxd->size = GMNAL_SMALL_MSG_SIZE(nal_data); + rxd->gmsize = gm_min_size_for_length(rxd->size); + + if (gm_hash_insert(nal_data->srxd_hash, + (void*)rxbuffer, (void*)rxd)) { + + CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] + for rxbuffer[%p]\n", rxd, rxbuffer); + return(GMNAL_STATUS_FAIL); + } + + rxd->next = nal_data->srxd; + nal_data->srxd = rxd; + CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], + size [%d]\n", rxd, rxd->buffer, rxd->size); + } + + return(GMNAL_STATUS_OK); +} + + + +/* Free the list of wired and gm_registered small rx buffers and the + * rx descriptors that go along with them. + */ +void +gmnal_free_srxd(gmnal_data_t *nal_data) +{ + gmnal_srxd_t *rxd = nal_data->srxd, *_rxd = NULL; + + CDEBUG(D_TRACE, "gmnal_free_small rx\n"); + + while(rxd) { + CDEBUG(D_INFO, "Freeing rxd [%p] buffer [%p], size [%d]\n", + rxd, rxd->buffer, rxd->size); + _rxd = rxd; + rxd = rxd->next; + +#if 0 + GMNAL_GM_LOCK(nal_data); + gm_deregister_memory(nal_data->gm_port, _rxd->buffer, + _rxd->size); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_rxd->buffer, GMNAL_SMALL_RXBUFFER_SIZE); +#else + GMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _rxd->buffer); + GMNAL_GM_UNLOCK(nal_data); +#endif + PORTAL_FREE(_rxd, sizeof(gmnal_srxd_t)); + } + return; +} + + +/* + * Get a rxd from the free list + * This get us a wired and gm_registered small rx buffer. + * This implicitly gets us a receive token also. + */ +gmnal_srxd_t * +gmnal_get_srxd(gmnal_data_t *nal_data, int block) +{ + + gmnal_srxd_t *rxd = NULL; + CDEBUG(D_TRACE, "nal_data [%p] block [%d]\n", nal_data, block); + + if (block) { + GMNAL_RXD_GETTOKEN(nal_data); + } else { + if (GMNAL_RXD_TRYGETTOKEN(nal_data)) { + CDEBUG(D_INFO, "gmnal_get_srxd Can't get token\n"); + return(NULL); + } + } + GMNAL_RXD_LOCK(nal_data); + rxd = nal_data->srxd; + if (rxd) + nal_data->srxd = rxd->next; + GMNAL_RXD_UNLOCK(nal_data); + CDEBUG(D_INFO, "got [%p], head is [%p]\n", rxd, nal_data->srxd); + return(rxd); +} + +/* + * Return an rxd to the list + */ +void +gmnal_return_srxd(gmnal_data_t *nal_data, gmnal_srxd_t *rxd) +{ + CDEBUG(D_TRACE, "nal_data [%p], rxd[%p]\n", nal_data, rxd); + + GMNAL_RXD_LOCK(nal_data); + rxd->next = nal_data->srxd; + nal_data->srxd = rxd; + GMNAL_RXD_UNLOCK(nal_data); + GMNAL_RXD_RETURNTOKEN(nal_data); + return; +} + +/* + * Given a pointer to a srxd find + * the relevant descriptor for it + * This is done by searching a hash + * list that is created when the srxd's + * are created + */ +gmnal_srxd_t * +gmnal_rxbuffer_to_srxd(gmnal_data_t *nal_data, void *rxbuffer) +{ + gmnal_srxd_t *srxd = NULL; + CDEBUG(D_TRACE, "nal_data [%p], rxbuffer [%p]\n", nal_data, rxbuffer); + srxd = gm_hash_find(nal_data->srxd_hash, rxbuffer); + CDEBUG(D_INFO, "srxd is [%p]\n", srxd); + return(srxd); +} + + +void +gmnal_stop_rxthread(gmnal_data_t *nal_data) +{ + int delay = 30; + + + + CDEBUG(D_TRACE, "Attempting to stop rxthread nal_data [%p]\n", + nal_data); + + nal_data->rxthread_stop_flag = GMNAL_THREAD_STOP; + + gmnal_remove_rxtwe(nal_data); + /* + * kick the thread + */ + up(&nal_data->rxtwe_wait); + + while(nal_data->rxthread_flag != GMNAL_THREAD_RESET && delay--) { + CDEBUG(D_INFO, "gmnal_stop_rxthread sleeping\n"); + gmnal_yield(1); + up(&nal_data->rxtwe_wait); + } + + if (nal_data->rxthread_flag != GMNAL_THREAD_RESET) { + CDEBUG(D_ERROR, "I don't know how to wake the thread\n"); + } else { + CDEBUG(D_INFO, "rx thread seems to have stopped\n"); + } +} + +void +gmnal_stop_ctthread(gmnal_data_t *nal_data) +{ + int delay = 15; + + + + CDEBUG(D_TRACE, "Attempting to stop ctthread nal_data [%p]\n", + nal_data); + + nal_data->ctthread_flag = GMNAL_THREAD_STOP; + GMNAL_GM_LOCK(nal_data); + gm_set_alarm(nal_data->gm_port, &nal_data->ctthread_alarm, 10, + NULL, NULL); + GMNAL_GM_UNLOCK(nal_data); + + while(nal_data->ctthread_flag == GMNAL_THREAD_STOP && delay--) { + CDEBUG(D_INFO, "gmnal_stop_ctthread sleeping\n"); + gmnal_yield(1); + } + + if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) { + CDEBUG(D_ERROR, "I DON'T KNOW HOW TO WAKE THE THREAD\n"); + } else { + CDEBUG(D_INFO, "CT THREAD SEEMS TO HAVE STOPPED\n"); + } +} + + + +char * +gmnal_gm_error(gm_status_t status) +{ + switch(status) { + case(GM_SUCCESS): + return("SUCCESS"); + case(GM_FAILURE): + return("FAILURE"); + case(GM_INPUT_BUFFER_TOO_SMALL): + return("INPUT_BUFFER_TOO_SMALL"); + case(GM_OUTPUT_BUFFER_TOO_SMALL): + return("OUTPUT_BUFFER_TOO_SMALL"); + case(GM_TRY_AGAIN ): + return("TRY_AGAIN"); + case(GM_BUSY): + return("BUSY"); + case(GM_MEMORY_FAULT): + return("MEMORY_FAULT"); + case(GM_INTERRUPTED): + return("INTERRUPTED"); + case(GM_INVALID_PARAMETER): + return("INVALID_PARAMETER"); + case(GM_OUT_OF_MEMORY): + return("OUT_OF_MEMORY"); + case(GM_INVALID_COMMAND): + return("INVALID_COMMAND"); + case(GM_PERMISSION_DENIED): + return("PERMISSION_DENIED"); + case(GM_INTERNAL_ERROR): + return("INTERNAL_ERROR"); + case(GM_UNATTACHED): + return("UNATTACHED"); + case(GM_UNSUPPORTED_DEVICE): + return("UNSUPPORTED_DEVICE"); + case(GM_SEND_TIMED_OUT): + return("GM_SEND_TIMEDOUT"); + case(GM_SEND_REJECTED): + return("GM_SEND_REJECTED"); + case(GM_SEND_TARGET_PORT_CLOSED): + return("GM_SEND_TARGET_PORT_CLOSED"); + case(GM_SEND_TARGET_NODE_UNREACHABLE): + return("GM_SEND_TARGET_NODE_UNREACHABLE"); + case(GM_SEND_DROPPED): + return("GM_SEND_DROPPED"); + case(GM_SEND_PORT_CLOSED): + return("GM_SEND_PORT_CLOSED"); + case(GM_NODE_ID_NOT_YET_SET): + return("GM_NODE_ID_NOT_YET_SET"); + case(GM_STILL_SHUTTING_DOWN): + return("GM_STILL_SHUTTING_DOWN"); + case(GM_CLONE_BUSY): + return("GM_CLONE_BUSY"); + case(GM_NO_SUCH_DEVICE): + return("GM_NO_SUCH_DEVICE"); + case(GM_ABORTED): + return("GM_ABORTED"); + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + return("GM_INCOMPATIBLE_LIB_AND_DRIVER"); + case(GM_UNTRANSLATED_SYSTEM_ERROR): + return("GM_UNTRANSLATED_SYSTEM_ERROR"); + case(GM_ACCESS_DENIED): + return("GM_ACCESS_DENIED"); + + +/* + * These ones are in the docs but aren't in the header file + case(GM_DEV_NOT_FOUND): + return("GM_DEV_NOT_FOUND"); + case(GM_INVALID_PORT_NUMBER): + return("GM_INVALID_PORT_NUMBER"); + case(GM_UC_ERROR): + return("GM_US_ERROR"); + case(GM_PAGE_TABLE_FULL): + return("GM_PAGE_TABLE_FULL"); + case(GM_MINOR_OVERFLOW): + return("GM_MINOR_OVERFLOW"); + case(GM_SEND_ORPHANED): + return("GM_SEND_ORPHANED"); + case(GM_HARDWARE_FAULT): + return("GM_HARDWARE_FAULT"); + case(GM_DATA_CORRUPTED): + return("GM_DATA_CORRUPTED"); + case(GM_TIMED_OUT): + return("GM_TIMED_OUT"); + case(GM_USER_ERROR): + return("GM_USER_ERROR"); + case(GM_NO_MATCH): + return("GM_NOMATCH"); + case(GM_NOT_SUPPORTED_IN_KERNEL): + return("GM_NOT_SUPPORTED_IN_KERNEL"); + case(GM_NOT_SUPPORTED_ON_ARCH): + return("GM_NOT_SUPPORTED_ON_ARCH"); + case(GM_PTE_REF_CNT_OVERFLOW): + return("GM_PTR_REF_CNT_OVERFLOW"); + case(GM_NO_DRIVER_SUPPORT): + return("GM_NO_DRIVER_SUPPORT"); + case(GM_FIRMWARE_NOT_RUNNING): + return("GM_FIRMWARE_NOT_RUNNING"); + + * These ones are in the docs but aren't in the header file + */ + default: + return("UNKNOWN GM ERROR CODE"); + } +} + + +char * +gmnal_rxevent(gm_recv_event_t *ev) +{ + short event; + event = GM_RECV_EVENT_TYPE(ev); + switch(event) { + case(GM_NO_RECV_EVENT): + return("GM_NO_RECV_EVENT"); + case(GM_SENDS_FAILED_EVENT): + return("GM_SEND_FAILED_EVENT"); + case(GM_ALARM_EVENT): + return("GM_ALARM_EVENT"); + case(GM_SENT_EVENT): + return("GM_SENT_EVENT"); + case(_GM_SLEEP_EVENT): + return("_GM_SLEEP_EVENT"); + case(GM_RAW_RECV_EVENT): + return("GM_RAW_RECV_EVENT"); + case(GM_BAD_SEND_DETECTED_EVENT): + return("GM_BAD_SEND_DETECTED_EVENT"); + case(GM_SEND_TOKEN_VIOLATION_EVENT): + return("GM_SEND_TOKEN_VIOLATION_EVENT"); + case(GM_RECV_TOKEN_VIOLATION_EVENT): + return("GM_RECV_TOKEN_VIOLATION_EVENT"); + case(GM_BAD_RECV_TOKEN_EVENT): + return("GM_BAD_RECV_TOKEN_EVENT"); + case(GM_ALARM_VIOLATION_EVENT): + return("GM_ALARM_VIOLATION_EVENT"); + case(GM_RECV_EVENT): + return("GM_RECV_EVENT"); + case(GM_HIGH_RECV_EVENT): + return("GM_HIGH_RECV_EVENT"); + case(GM_PEER_RECV_EVENT): + return("GM_PEER_RECV_EVENT"); + case(GM_HIGH_PEER_RECV_EVENT): + return("GM_HIGH_PEER_RECV_EVENT"); + case(GM_FAST_RECV_EVENT): + return("GM_FAST_RECV_EVENT"); + case(GM_FAST_HIGH_RECV_EVENT): + return("GM_FAST_HIGH_RECV_EVENT"); + case(GM_FAST_PEER_RECV_EVENT): + return("GM_FAST_PEER_RECV_EVENT"); + case(GM_FAST_HIGH_PEER_RECV_EVENT): + return("GM_FAST_HIGH_PEER_RECV_EVENT"); + case(GM_REJECTED_SEND_EVENT): + return("GM_REJECTED_SEND_EVENT"); + case(GM_ORPHANED_SEND_EVENT): + return("GM_ORPHANED_SEND_EVENT"); + case(GM_BAD_RESEND_DETECTED_EVENT): + return("GM_BAD_RESEND_DETETED_EVENT"); + case(GM_DROPPED_SEND_EVENT): + return("GM_DROPPED_SEND_EVENT"); + case(GM_BAD_SEND_VMA_EVENT): + return("GM_BAD_SEND_VMA_EVENT"); + case(GM_BAD_RECV_VMA_EVENT): + return("GM_BAD_RECV_VMA_EVENT"); + case(_GM_FLUSHED_ALARM_EVENT): + return("GM_FLUSHED_ALARM_EVENT"); + case(GM_SENT_TOKENS_EVENT): + return("GM_SENT_TOKENS_EVENTS"); + case(GM_IGNORE_RECV_EVENT): + return("GM_IGNORE_RECV_EVENT"); + case(GM_ETHERNET_RECV_EVENT): + return("GM_ETHERNET_RECV_EVENT"); + case(GM_NEW_NO_RECV_EVENT): + return("GM_NEW_NO_RECV_EVENT"); + case(GM_NEW_SENDS_FAILED_EVENT): + return("GM_NEW_SENDS_FAILED_EVENT"); + case(GM_NEW_ALARM_EVENT): + return("GM_NEW_ALARM_EVENT"); + case(GM_NEW_SENT_EVENT): + return("GM_NEW_SENT_EVENT"); + case(_GM_NEW_SLEEP_EVENT): + return("GM_NEW_SLEEP_EVENT"); + case(GM_NEW_RAW_RECV_EVENT): + return("GM_NEW_RAW_RECV_EVENT"); + case(GM_NEW_BAD_SEND_DETECTED_EVENT): + return("GM_NEW_BAD_SEND_DETECTED_EVENT"); + case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT): + return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT"); + case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT): + return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT"); + case(GM_NEW_BAD_RECV_TOKEN_EVENT): + return("GM_NEW_BAD_RECV_TOKEN_EVENT"); + case(GM_NEW_ALARM_VIOLATION_EVENT): + return("GM_NEW_ALARM_VIOLATION_EVENT"); + case(GM_NEW_RECV_EVENT): + return("GM_NEW_RECV_EVENT"); + case(GM_NEW_HIGH_RECV_EVENT): + return("GM_NEW_HIGH_RECV_EVENT"); + case(GM_NEW_PEER_RECV_EVENT): + return("GM_NEW_PEER_RECV_EVENT"); + case(GM_NEW_HIGH_PEER_RECV_EVENT): + return("GM_NEW_HIGH_PEER_RECV_EVENT"); + case(GM_NEW_FAST_RECV_EVENT): + return("GM_NEW_FAST_RECV_EVENT"); + case(GM_NEW_FAST_HIGH_RECV_EVENT): + return("GM_NEW_FAST_HIGH_RECV_EVENT"); + case(GM_NEW_FAST_PEER_RECV_EVENT): + return("GM_NEW_FAST_PEER_RECV_EVENT"); + case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT): + return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT"); + case(GM_NEW_REJECTED_SEND_EVENT): + return("GM_NEW_REJECTED_SEND_EVENT"); + case(GM_NEW_ORPHANED_SEND_EVENT): + return("GM_NEW_ORPHANED_SEND_EVENT"); + case(_GM_NEW_PUT_NOTIFICATION_EVENT): + return("_GM_NEW_PUT_NOTIFICATION_EVENT"); + case(GM_NEW_FREE_SEND_TOKEN_EVENT): + return("GM_NEW_FREE_SEND_TOKEN_EVENT"); + case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT): + return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT"); + case(GM_NEW_BAD_RESEND_DETECTED_EVENT): + return("GM_NEW_BAD_RESEND_DETECTED_EVENT"); + case(GM_NEW_DROPPED_SEND_EVENT): + return("GM_NEW_DROPPED_SEND_EVENT"); + case(GM_NEW_BAD_SEND_VMA_EVENT): + return("GM_NEW_BAD_SEND_VMA_EVENT"); + case(GM_NEW_BAD_RECV_VMA_EVENT): + return("GM_NEW_BAD_RECV_VMA_EVENT"); + case(_GM_NEW_FLUSHED_ALARM_EVENT): + return("GM_NEW_FLUSHED_ALARM_EVENT"); + case(GM_NEW_SENT_TOKENS_EVENT): + return("GM_NEW_SENT_TOKENS_EVENT"); + case(GM_NEW_IGNORE_RECV_EVENT): + return("GM_NEW_IGNORE_RECV_EVENT"); + case(GM_NEW_ETHERNET_RECV_EVENT): + return("GM_NEW_ETHERNET_RECV_EVENT"); + default: + return("Unknown Recv event"); +#if 0 + case(/* _GM_PUT_NOTIFICATION_EVENT */ + case(/* GM_FREE_SEND_TOKEN_EVENT */ + case(/* GM_FREE_HIGH_SEND_TOKEN_EVENT */ +#endif + } +} + + +void +gmnal_yield(int delay) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); +} + +int +gmnal_is_small_msg(gmnal_data_t *nal_data, int niov, struct iovec *iov, + int len) +{ + + CDEBUG(D_TRACE, "len [%d] limit[%d]\n", len, + GMNAL_SMALL_MSG_SIZE(nal_data)); + + if ((len + sizeof(ptl_hdr_t) + sizeof(gmnal_msghdr_t)) + < GMNAL_SMALL_MSG_SIZE(nal_data)) { + + CDEBUG(D_INFO, "Yep, small message\n"); + return(1); + } else { + CDEBUG(D_ERROR, "No, not small message\n"); + /* + * could be made up of lots of little ones ! + */ + return(0); + } + +} + +int +gmnal_add_rxtwe(gmnal_data_t *nal_data, gm_recv_event_t *rxevent) +{ + gmnal_rxtwe_t *we = NULL; + + CDEBUG(D_NET, "adding entry to list\n"); + + PORTAL_ALLOC(we, sizeof(gmnal_rxtwe_t)); + if (!we) { + CDEBUG(D_ERROR, "failed to malloc\n"); + return(GMNAL_STATUS_FAIL); + } + we->rx = rxevent; + + spin_lock(&nal_data->rxtwe_lock); + if (nal_data->rxtwe_tail) { + nal_data->rxtwe_tail->next = we; + } else { + nal_data->rxtwe_head = we; + nal_data->rxtwe_tail = we; + } + nal_data->rxtwe_tail = we; + spin_unlock(&nal_data->rxtwe_lock); + + up(&nal_data->rxtwe_wait); + return(GMNAL_STATUS_OK); +} + +void +gmnal_remove_rxtwe(gmnal_data_t *nal_data) +{ + gmnal_rxtwe_t *_we, *we = nal_data->rxtwe_head; + + CDEBUG(D_NET, "removing all work list entries\n"); + + spin_lock(&nal_data->rxtwe_lock); + CDEBUG(D_NET, "Got lock\n"); + while (we) { + _we = we; + we = we->next; + PORTAL_FREE(_we, sizeof(gmnal_rxtwe_t)); + } + spin_unlock(&nal_data->rxtwe_lock); + nal_data->rxtwe_head = NULL; + nal_data->rxtwe_tail = NULL; +} + +gmnal_rxtwe_t * +gmnal_get_rxtwe(gmnal_data_t *nal_data) +{ + gmnal_rxtwe_t *we = NULL; + + CDEBUG(D_NET, "Getting entry to list\n"); + + do { + down(&nal_data->rxtwe_wait); + if (nal_data->rxthread_stop_flag == GMNAL_THREAD_STOP) { + /* + * time to stop + * TO DO some one free the work entries + */ + return(NULL); + } + spin_lock(&nal_data->rxtwe_lock); + if (nal_data->rxtwe_head) { + CDEBUG(D_WARNING, "Got a work entry\n"); + we = nal_data->rxtwe_head; + nal_data->rxtwe_head = we->next; + if (!nal_data->rxtwe_head) + nal_data->rxtwe_tail = NULL; + } else { + CDEBUG(D_WARNING, "woken but no work\n"); + } + spin_unlock(&nal_data->rxtwe_lock); + } while (!we); + + CDEBUG(D_WARNING, "Returning we[%p]\n", we); + return(we); +} + + +/* + * Start the caretaker thread and a number of receiver threads + * The caretaker thread gets events from the gm library. + * It passes receive events to the receiver threads via a work list. + * It processes other events itself in gm_unknown. These will be + * callback events or sleeps. + */ +int +gmnal_start_kernel_threads(gmnal_data_t *nal_data) +{ + + int threads = 0; + /* + * the alarm is used to wake the caretaker thread from + * gm_unknown call (sleeping) to exit it. + */ + CDEBUG(D_NET, "Initializing caretaker thread alarm and flag\n"); + gm_initialize_alarm(&nal_data->ctthread_alarm); + nal_data->ctthread_flag = GMNAL_THREAD_RESET; + + + CDEBUG(D_INFO, "Starting caretaker thread\n"); + nal_data->ctthread_pid = + kernel_thread(gmnal_ct_thread, (void*)nal_data, 0); + if (nal_data->ctthread_pid <= 0) { + CDEBUG(D_ERROR, "Caretaker thread failed to start\n"); + return(GMNAL_STATUS_FAIL); + } + + while (nal_data->rxthread_flag != GMNAL_THREAD_RESET) { + gmnal_yield(1); + CDEBUG(D_INFO, "Waiting for caretaker thread signs of life\n"); + } + + CDEBUG(D_INFO, "caretaker thread has started\n"); + + + /* + * Now start a number of receiver threads + * these treads get work to do from the caretaker (ct) thread + */ + nal_data->rxthread_flag = GMNAL_THREAD_RESET; + nal_data->rxthread_stop_flag = GMNAL_THREAD_RESET; + + for (threads=0; threadsrxthread_pid[threads] = -1; + spin_lock_init(&nal_data->rxtwe_lock); + spin_lock_init(&nal_data->rxthread_flag_lock); + sema_init(&nal_data->rxtwe_wait, 0); + nal_data->rxtwe_head = NULL; + nal_data->rxtwe_tail = NULL; + /* + * If the default number of receive threades isn't + * modified at load time, then start one thread per cpu + */ + if (num_rx_threads == -1) + num_rx_threads = smp_num_cpus; + CDEBUG(D_INFO, "Starting [%d] receive threads\n", num_rx_threads); + for (threads=0; threadsrxthread_pid[threads] = + kernel_thread(gmnal_rx_thread, (void*)nal_data, 0); + if (nal_data->rxthread_pid[threads] <= 0) { + CDEBUG(D_ERROR, "Receive thread failed to start\n"); + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + return(GMNAL_STATUS_FAIL); + } + } + + for (;;) { + spin_lock(&nal_data->rxthread_flag_lock); + if (nal_data->rxthread_flag == GMNAL_RXTHREADS_STARTED) { + spin_unlock(&nal_data->rxthread_flag_lock); + break; + } + spin_unlock(&nal_data->rxthread_flag_lock); + gmnal_yield(1); + } + + CDEBUG(D_INFO, "receive threads seem to have started\n"); + + return(GMNAL_STATUS_OK); +} + +EXPORT_SYMBOL(gmnal_yield); +EXPORT_SYMBOL(gmnal_alloc_srxd); +EXPORT_SYMBOL(gmnal_get_srxd); +EXPORT_SYMBOL(gmnal_return_srxd); +EXPORT_SYMBOL(gmnal_free_srxd); +EXPORT_SYMBOL(gmnal_alloc_stxd); +EXPORT_SYMBOL(gmnal_get_stxd); +EXPORT_SYMBOL(gmnal_return_stxd); +EXPORT_SYMBOL(gmnal_free_stxd); +EXPORT_SYMBOL(gmnal_rxbuffer_to_srxd); +EXPORT_SYMBOL(gmnal_rxevent); +EXPORT_SYMBOL(gmnal_gm_error); +EXPORT_SYMBOL(gmnal_stop_ctthread); +EXPORT_SYMBOL(gmnal_add_rxtwe); +EXPORT_SYMBOL(gmnal_get_rxtwe); diff --git a/lnet/klnds/gmlnd/gmnal.c b/lnet/klnds/gmlnd/gmnal.c deleted file mode 100644 index 24708f7..0000000 --- a/lnet/klnds/gmlnd/gmnal.c +++ /dev/null @@ -1,284 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Based on ksocknal and qswnal - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Robert Read - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "gmnal.h" - -ptl_handle_ni_t kgmnal_ni; -nal_t kgmnal_api; - -kgmnal_data_t kgmnal_data; -int gmnal_debug = 0; - -kpr_nal_interface_t kqswnal_router_interface = { - kprni_nalid: GMNAL, - kprni_arg: NULL, - kprni_fwd: kgmnal_fwd_packet, -}; - -static int kgmnal_forward(nal_t *nal, - int id, - void *args, size_t args_len, - void *ret, size_t ret_len) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */ - return PTL_OK; -} - -static void kgmnal_lock(nal_t *nal, unsigned long *flags) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - nal_cb->cb_cli(nal_cb,flags); -} - -static void kgmnal_unlock(nal_t *nal, unsigned long *flags) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - nal_cb->cb_sti(nal_cb,flags); -} - -static int kgmnal_shutdown(nal_t *nal, int ni) -{ - LASSERT (nal == &kgmnal_api); - return 0; -} - -static void kgmnal_yield( nal_t *nal ) -{ - LASSERT (nal == &kgmnal_api); - - if (current->need_resched) - schedule(); - return; -} - -kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *data,int ndx) -{ - kgmnal_rx_t *conn; - - PORTAL_ALLOC(conn, sizeof(kgmnal_rx_t)); - /* Check for out of mem here */ - if (conn==NULL) { - printk("LustreError: kgm_add_recv: memory alloc failed\n"); - return NULL; - } - - list_add(&conn->krx_item,(struct list_head *)&data->kgm_list); - // conn->ndx=ndx; - // conn->len=conn->ptlhdr_copied=0; - // conn->loopback=0; - return conn; -} - -static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - unsigned int nnids; - - gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids); - - CDEBUG(D_NET, "calling lib_init with nid 0x%Lx of %d\n", - kgmnal_data.kgm_nid, nnids); - lib_init(&kgmnal_lib, kgmnal_data.kgm_nid, 0, nnids,ptl_size, ac_size); - return &kgmnal_api; -} - -static void /*__exit*/ -kgmnal_finalize(void) -{ - struct list_head *tmp; - - PORTAL_SYMBOL_UNREGISTER (kgmnal_ni); - PtlNIFini(kgmnal_ni); - lib_fini(&kgmnal_api); - - if (kgmnal_data.kgm_port) { - gm_close(kgmnal_data.kgm_port); - } - - /* FIXME: free dma buffers */ - /* FIXME: kill receiver thread */ - - PORTAL_FREE (kgmnal_data.kgm_trans, bsizeof(kgmnal_tx_t)*TXMSGS); - - list_for_each(tmp, &kgmnal_data.kgm_list) { - kgmnal_rx_t *conn; - conn = list_entry(tmp, kgmnal_rx_t, krx_item); - CDEBUG(D_IOCTL, "freeing conn %p\n",conn); - tmp = tmp->next; - list_del(&conn->krx_item); - PORTAL_FREE(conn, sizeof(*conn)); - } - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); - - return; -} - -static int __init -kgmnal_initialize(void) -{ - int rc; - int ntok; - unsigned long sizemask; - unsigned int nid; - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); - - kgmnal_api.forward = kgmnal_forward; - kgmnal_api.shutdown = kgmnal_shutdown; - kgmnal_api.yield = kgmnal_yield; - kgmnal_api.validate = NULL; /* our api validate is a NOOP */ - kgmnal_api.lock= kgmnal_lock; - kgmnal_api.unlock= kgmnal_unlock; - kgmnal_api.nal_data = &kgmnal_data; - - kgmnal_lib.nal_data = &kgmnal_data; - - memset(&kgmnal_data, 0, sizeof(kgmnal_data)); - - INIT_LIST_HEAD(&kgmnal_data.kgm_list); - kgmnal_data.kgm_cb = &kgmnal_lib; - - /* Allocate transmit descriptors */ - PORTAL_ALLOC (kgmnal_data.kgm_trans, sizeof(kgmnal_tx_t)*TXMSGS); - if (kgmnal_data.kgm_trans==NULL) { - printk("LustreError: kgmnal: init: failed to allocate transmit " - "descriptors\n"); - return -1; - } - memset(kgmnal_data.kgm_trans,-1,sizeof(kgmnal_tx_t)*(TXMSGS)); - - spin_lock_init(&kgmnal_data.kgm_dispatch_lock); - spin_lock_init(&kgmnal_data.kgm_update_lock); - spin_lock_init(&kgmnal_data.kgm_send_lock); - - /* Do the receiver and xmtr allocation */ - - rc = gm_init(); - if (rc != GM_SUCCESS) { - CERROR("gm_init failed: %d\n", rc); - return -1; - } - - rc = gm_open(&kgmnal_data.kgm_port, 0 , KGM_PORT_NUM, KGM_HOSTNAME, - GM_API_VERSION_1_1); - if (rc != GM_SUCCESS) { - gm_finalize(); - kgmnal_data.kgm_port = NULL; - CERROR("gm_open failed: %d\n", rc); - return -1; - } - gm_get_node_id(kgmnal_data.kgm_port, &nid); - kgmnal_data.kgm_nid = nid; - /* Allocate 2 different sizes of buffers. For new, use half - the tokens for each. */ - ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2; - CDEBUG(D_NET, "gmnal_init: creating %d large %d byte recv buffers\n", - ntok, MSG_LEN_LARGE); - while (ntok-- > 0) { - void * buffer = gm_dma_malloc(kgmnal_data.kgm_port, - MSG_LEN_LARGE); - if (buffer == NULL) { - CERROR("gm_init failed: %d\n", rc); - return (-ENOMEM); - } - CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d " - "pri %d\n ", kgmnal_data.kgm_port, buffer, - MSG_LEN_LARGE, MSG_SIZE_LARGE, GM_LOW_PRIORITY); - - gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer, - MSG_SIZE_LARGE, GM_LOW_PRIORITY); - } - - ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2; - CDEBUG(D_NET, "gmnal_init: creating %d small %d byte recv buffers\n", - ntok, MSG_LEN_SMALL); - while (ntok-- > 0) { - void * buffer = gm_dma_malloc(kgmnal_data.kgm_port, - MSG_LEN_SMALL); - if (buffer == NULL) { - CERROR("gm_init failed: %d\n", rc); - return (-ENOMEM); - } - CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d " - "pri %d\n ", kgmnal_data.kgm_port, buffer, - MSG_LEN_SMALL, MSG_SIZE_SMALL, GM_LOW_PRIORITY); - - gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer, - MSG_SIZE_SMALL, GM_LOW_PRIORITY); - - } - sizemask = (1 << MSG_SIZE_LARGE) | (1 << MSG_SIZE_SMALL); - CDEBUG(D_NET, "gm_set_acceptable_sizes port %p pri %d mask 0x%x\n", - kgmnal_data.kgm_port, GM_LOW_PRIORITY, sizemask); - gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_LOW_PRIORITY, - sizemask); - gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_HIGH_PRIORITY, 0); - - /* Initialize Network Interface */ - rc = PtlNIInit(kgmnal_init, 32, 4, 0, &kgmnal_ni); - if (rc) { - CERROR("PtlNIInit failed %d\n", rc); - return (-ENOMEM); - } - - /* Start receiver thread */ - kernel_thread(kgmnal_recv_thread, &kgmnal_data, 0); - - PORTAL_SYMBOL_REGISTER(kgmnal_ni); - - kgmnal_data.kgm_init = 1; - - return 0; -} - -MODULE_AUTHOR("Robert Read "); -MODULE_DESCRIPTION("Kernel Myrinet GM NAL v0.1"); -MODULE_LICENSE("GPL"); - -module_init (kgmnal_initialize); -module_exit (kgmnal_finalize); - -EXPORT_SYMBOL (kgmnal_ni); diff --git a/lnet/klnds/lgmlnd/Makefile.am b/lnet/klnds/lgmlnd/Makefile.am deleted file mode 100644 index 6794494..0000000 --- a/lnet/klnds/lgmlnd/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = lgmnal -modulenet_DATA = lgmnal.o -EXTRA_PROGRAMS = lgmnal - -DEFS = -lgmnal_SOURCES = lgmnal.h lgmnal_api.c lgmnal_cb.c lgmnal_comm.c lgmnal_utils.c lgmnal_module.c diff --git a/lnet/klnds/lgmlnd/lgmnal.h b/lnet/klnds/lgmlnd/lgmnal.h deleted file mode 100644 index 8b496ec..0000000 --- a/lnet/klnds/lgmlnd/lgmnal.h +++ /dev/null @@ -1,463 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Portals GM kernel NAL header file - * This file makes all declaration and prototypes - * for the API side and CB side of the NAL - */ -#ifndef __INCLUDE_LGMNAL_H__ -#define __INCLUDE_LGMNAL_H__ - -#include "linux/config.h" -#include "linux/module.h" -#include "linux/tty.h" -#include "linux/kernel.h" -#include "linux/mm.h" -#include "linux/string.h" -#include "linux/stat.h" -#include "linux/errno.h" -#include "linux/locks.h" -#include "linux/unistd.h" -#include "linux/init.h" -#include "linux/sem.h" -#include "linux/vmalloc.h" -#ifdef MODVERSIONS -#include -#endif - - -#include "portals/nal.h" -#include "portals/api.h" -#include "portals/errno.h" -#include "linux/kp30.h" -#include "portals/p30.h" - -#include "portals/lib-nal.h" -#include "portals/lib-p30.h" - -#define GM_STRONG_TYPES 1 -#include "gm.h" -#include "gm_internal.h" - - -/* - * Defines for the API NAL - */ - - - -/* - * Small message size is configurable - * insmod can set small_msg_size - * which is used to populate nal_data.small_msg_size - */ -#define LGMNAL_SMALL_MESSAGE 1078 -#define LGMNAL_LARGE_MESSAGE_INIT 1079 -#define LGMNAL_LARGE_MESSAGE_ACK 1080 -#define LGMNAL_LARGE_MESSAGE_FINI 1081 - -extern int lgmnal_small_msg_size; -#define LGMNAL_SMALL_MSG_SIZE(a) a->small_msg_size -#define LGMNAL_IS_SMALL_MESSAGE(n,a,b,c) lgmnal_is_small_message(n, a, b, c) -#define LGMNAL_MAGIC 0x1234abcd - -typedef struct _lgmnal_hash { - void *key; - void *data; - struct _lgmnal_hash *next; - } lgmnal_hash_t; - -/* - * Small Transmit Descriptor - * A structre to keep track of a small transmit operation - * This structure has a one-to-one relationship with a small - * transmit buffer (both create by lgmnal_stxd_alloc). - * stxd has pointer to txbuffer and the hash table in nal_data - * allows us to go the other way. - */ -typedef struct _lgmnal_stxd_t { - void *buffer; /* Address of small wired buffer this decriptor uses */ - int size; /* size (in bytes) of the tx buffer this descripto uses */ - gm_size_t gmsize; /* gmsize of the tx buffer this descripto uses */ - int type; /* large or small message */ - struct _lgmnal_data_t *nal_data; - lib_msg_t *cookie; /* the cookie the portals library gave us */ - int niov; - struct iovec iov[PTL_MD_MAX_IOV]; - struct _lgmnal_stxd_t *next; -} lgmnal_stxd_t; - -/* - * as for lgmnal_stxd_t - */ -typedef struct _lgmnal_srxd_t { - void *buffer; - int size; - gm_size_t gmsize; - int type; - struct _lgmnal_srxd_t *next; -} lgmnal_srxd_t; - -/* - * Header which lmgnal puts at the start of each message - */ -typedef struct _lgmnal_msghdr { - int magic; - int type; - unsigned int sender_node_id; - lgmnal_stxd_t *stxd; - } lgmnal_msghdr_t; -#define LGMNAL_MSGHDR_SIZE sizeof(lgmnal_msghdr_t) - -/* - * There's one of these for each interface that is initialised - * There's a maximum of LGMNAL_NUM_IF lgmnal_data_t - */ - -typedef struct _lgmnal_data_t { - int refcnt; -#ifdef LGMNAL_API_LOCK_SPIN - spinlock_t api_lock; /* lock provided for api->lock function */ -#else - struct semaphore api_lock; -#endif - spinlock_t cb_lock; /* lock provided for cb_cli function */ - char _cb_file[128]; - char _cb_function[128]; - int _cb_line; - spinlock_t stxd_lock; /* lock to add or remove stxd to/from free list */ - struct semaphore stxd_token; /* Don't try to access the list until get a token */ - lgmnal_stxd_t *stxd; /* list of free stxd's */ -#ifdef LGMNAL_USE_GM_HASH - struct gm_hash *stxd_hash; /* hash to translate txbuffer to stxd. Created in stxd_alloc */ -#else - lgmnal_hash_t *stxd_hash; /* hash to translate txbuffer to stxd. Created in stxd_alloc */ -#endif - spinlock_t srxd_lock; - struct semaphore srxd_token; - lgmnal_srxd_t *srxd; -#ifdef LGMNAL_USE_GM_HASH - struct gm_hash *srxd_hash; -#else - lgmnal_hash_t *srxd_hash; -#endif - nal_t *nal; /* our API NAL */ - nal_cb_t *nal_cb; /* our CB nal */ - struct gm_port *gm_port; /* the gm port structure we open in lgmnal_init */ - unsigned int gm_local_nid; /* our gm local node id */ - unsigned int gm_global_nid; /* our gm global node id */ - spinlock_t gm_lock; /* GM is not threadsage */ - long rxthread_pid; /* thread id of our receiver thread */ - int rxthread_flag; /* stop the thread flag */ - gm_alarm_t rxthread_alarm; /* used to wake sleeping rx thread */ - int small_msg_size; - int small_msg_gmsize; - char _file[128]; - char _function[128]; - int _line; -} lgmnal_data_t; - -/* - * For nal_data->rxthread_flag - */ -#define LGMNAL_THREAD_START 444 -#define LGMNAL_THREAD_STARTED 333 -#define LGMNAL_THREAD_CONTINUE 777 -#define LGMNAL_THREAD_STOP 666 -#define LGMNAL_THREAD_STOPPED 555 - -#define LGMNAL_NUM_IF 1 - -#if 0 -/* - * A global structre to maintain 1 nal_data structure for each - * myrinet card that the user initialises (only tested for 1) - * To add or remove any nal_data structures from the ifs arrary the - * init_lock must be acquired. This is the only time this lock is acquired - */ -typedef struct _lgmnal_global_t { - int debug_level; - struct semaphore init_lock; - lgmnal_data_t *ifs[LGMNAL_NUM_IF]; -} lgmnal_global_t; - -extern lgmnal_data_t global_nal_data; -#define LGMNAL_DEBUG_LEVEL lgmnal_global.debug_level -#else -extern lgmnal_data_t *global_nal_data; -extern int lgmnal_debug_level; -#define LGMNAL_DEBUG_LEVEL lgmnal_debug_level -#endif - -/* - * The gm_port to use for lgmnal - */ -#define LGMNAL_GM_PORT 4 - -/* - * for ioctl get pid - */ -#define LGMNAL_IOC_GET_GNID 1 - -/* - * LGMNAL_DEBUG_LEVEL set by module load 0= level) lgmnal_print args -#else -#define LGMNAL_PRINT(level, args) -#endif - -#define LGMNAL_DEBUG_ERR 1 /* only report errors */ -#define LGMNAL_DEBUG_TRACE 2 /* on entering function */ -#define LGMNAL_DEBUG_V 3 /* debug */ -#define LGMNAL_DEBUG_VV 4 /* more debug */ - -/* - * Return codes - */ -#define LGMNAL_STATUS_OK 0 -#define LGMNAL_STATUS_FAIL 1 -#define LGMNAL_STATUS_NOMEM 2 - - -/* - * FUNCTION PROTOTYPES - */ - -/* - * Locking macros - */ - -/* - * To access the global structure - * to add or remove interface (lgmnal_init) or shutdown only - */ -#define LGMNAL_GLOBAL_LOCK_INIT sema_init(&(lgmnal_global.init_lock), 1) -#define LGMNAL_GLOBAL_LOCK do { \ - LGMNAL_PRINT(1, ("Acquiring global mutex\n")); \ - down(&(lgmnal_global.init_lock)); \ - LGMNAL_PRINT(1, ("Got global lock\n")); \ - } while (0) -#define LGMNAL_GLOBAL_UNLOCK do { \ - LGMNAL_PRINT(1, ("Releasing global mutex\n")); \ - up(&(lgmnal_global.init_lock)); \ - LGMNAL_PRINT(1, ("Release global mutex\n")); \ - } while (0) - -/* - * For the API lock function - */ -#ifdef LGMNAL_API_LOCK_SPIN -#define LGMNAL_API_LOCK_INIT(a) spin_lock_init(&a->api_lock) -#define LGMNAL_API_LOCK(a) spin_lock(&a->api_lock) -#define LGMNAL_API_UNLOCK(a) spin_unlock(&a->api_lock) -#else -#define LGMNAL_API_LOCK_INIT(a) sema_init(&a->api_lock, 1) -#define LGMNAL_API_LOCK(a) down(&a->api_lock) -#define LGMNAL_API_UNLOCK(a) up(&a->api_lock) -#endif - -/* - * For the Small tx and rx descriptor lists - */ -#define LGMNAL_TXD_LOCK_INIT(a) spin_lock_init(&a->stxd_lock); -#define LGMNAL_TXD_LOCK(a) spin_lock(&a->stxd_lock); -#define LGMNAL_TXD_UNLOCK(a) spin_unlock(&a->stxd_lock); -#define LGMNAL_TXD_TOKEN_INIT(a, n) sema_init(&a->stxd_token, n); -#define LGMNAL_TXD_GETTOKEN(a) down(&a->stxd_token); -#define LGMNAL_TXD_TRYGETTOKEN(a) down_trylock(&a->stxd_token) -#define LGMNAL_TXD_RETURNTOKEN(a) up(&a->stxd_token); - - -#define LGMNAL_RXD_LOCK_INIT(a) spin_lock_init(&a->srxd_lock); -#define LGMNAL_RXD_LOCK(a) spin_lock(&a->srxd_lock); -#define LGMNAL_RXD_UNLOCK(a) spin_unlock(&a->srxd_lock); -#define LGMNAL_RXD_TOKEN_INIT(a, n) sema_init(&a->srxd_token, n); -#define LGMNAL_RXD_GETTOKEN(a) down(&a->srxd_token); -#define LGMNAL_RXD_TRYGETTOKEN(a) down_trylock(&a->srxd_token) -#define LGMNAL_RXD_RETURNTOKEN(a) up(&a->srxd_token); - -#define LGMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock); -#define LGMNAL_GM_LOCK(a) do { \ - while (!spin_trylock(&a->gm_lock)) { \ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("waiting %s:%s:%d holder %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__, nal_data->_function, nal_data->_file, nal_data->_line)); \ - lgmnal_yield(128); \ - } \ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("GM Locked %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__)); \ - sprintf(nal_data->_function, "%s", __FUNCTION__); \ - sprintf(nal_data->_file, "%s", __FILE__); \ - nal_data->_line = __LINE__; \ - } while (0) -#define LGMNAL_GM_UNLOCK(a) do { \ - spin_unlock(&a->gm_lock); \ - memset(nal_data->_function, 0, 128); \ - memset(nal_data->_file, 0, 128); \ - nal_data->_line = 0; \ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("GM Unlocked %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__)); \ - } while(0); - -#define LGMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock); - - -/* - * API NAL - */ -int lgmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); - -int lgmnal_api_shutdown(nal_t *, int); - -int lgmnal_api_validate(nal_t *, void *, size_t); - -void lgmnal_api_yield(nal_t *); - -void lgmnal_api_lock(nal_t *, unsigned long *); - -void lgmnal_api_unlock(nal_t *, unsigned long *); - - -#define LGMNAL_INIT_NAL(a) do { \ - a->forward = lgmnal_api_forward; \ - a->shutdown = lgmnal_api_shutdown; \ - a->validate = NULL; \ - a->yield = lgmnal_api_yield; \ - a->lock = lgmnal_api_lock; \ - a->unlock = lgmnal_api_unlock; \ - a->timeout = NULL; \ - a->refct = 1; \ - a->nal_data = NULL; \ - } while (0) - - -/* - * CB NAL - */ - -int lgmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, - int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t); - -int lgmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, - int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t); - -int lgmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *, - unsigned int, struct iovec *, size_t, size_t); - -int lgmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *, - unsigned int, ptl_kiov_t *, size_t, size_t); - -int lgmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t); - -int lgmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t); - -int lgmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *); - -void *lgmnal_cb_malloc(nal_cb_t *, size_t); - -void lgmnal_cb_free(nal_cb_t *, void *, size_t); - -void lgmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **); - -int lgmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **); - -void lgmnal_cb_printf(nal_cb_t *, const char *fmt, ...); - -void lgmnal_cb_cli(nal_cb_t *, unsigned long *); - -void lgmnal_cb_sti(nal_cb_t *, unsigned long *); - -int lgmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); - -nal_t *lgmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); - -void lgmnal_fini(void); - - - -#define LGMNAL_INIT_NAL_CB(a) do { \ - a->cb_send = lgmnal_cb_send; \ - a->cb_send_pages = lgmnal_cb_send_pages; \ - a->cb_recv = lgmnal_cb_recv; \ - a->cb_recv_pages = lgmnal_cb_recv_pages; \ - a->cb_read = lgmnal_cb_read; \ - a->cb_write = lgmnal_cb_write; \ - a->cb_callback = lgmnal_cb_callback; \ - a->cb_malloc = lgmnal_cb_malloc; \ - a->cb_free = lgmnal_cb_free; \ - a->cb_map = NULL; \ - a->cb_unmap = NULL; \ - a->cb_printf = lgmnal_cb_printf; \ - a->cb_cli = lgmnal_cb_cli; \ - a->cb_sti = lgmnal_cb_sti; \ - a->cb_dist = lgmnal_cb_dist; \ - a->nal_data = NULL; \ - } while (0) - -/* - * lgmnal utilities - */ - -void lgmnal_print(const char *, ...); - -/* - * Small Transmit and Receive Descriptor Functions - */ -int lgmnal_alloc_stxd(lgmnal_data_t *); -void lgmnal_free_stxd(lgmnal_data_t *); -lgmnal_stxd_t* lgmnal_get_stxd(lgmnal_data_t *, int); -void lgmnal_return_stxd(lgmnal_data_t *, lgmnal_stxd_t *); - -int lgmnal_alloc_srxd(lgmnal_data_t *); -void lgmnal_free_srxd(lgmnal_data_t *); -lgmnal_srxd_t* lgmnal_get_srxd(lgmnal_data_t *, int); -void lgmnal_return_srxd(lgmnal_data_t *, lgmnal_srxd_t *); - -/* - * general utility functions - */ -lgmnal_srxd_t *lgmnal_rxbuffer_to_srxd(lgmnal_data_t *, void*); -lgmnal_stxd_t *lgmnal_txbuffer_to_stxd(lgmnal_data_t *, void*); -void lgmnal_stop_rxthread(lgmnal_data_t *); -void lgmnal_small_tx_done(gm_port_t *, void *, gm_status_t); -char *lgmnal_gm_error(gm_status_t); -char *lgmnal_rxevent(gm_recv_event_t*); -int lgmnal_is_small_message(lgmnal_data_t*, int, struct iovec*, int); - -void *lgmnal_hash_find(lgmnal_hash_t *, void*); -int lgmnal_hash_add(lgmnal_hash_t**, void*, void*); -void lgmnal_hash_free(lgmnal_hash_t**); - -/* - * Communication functions - */ -int lgmnal_receive_thread(void *); -int -lgmnal_small_transmit(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec*, int); - -int -lgmnal_small_receive2(nal_cb_t *, void *, lib_msg_t *, unsigned int, struct iovec *, size_t, size_t); - -void lgmnal_yield(int); - -#endif /*__INCLUDE_LGMNAL_H__*/ diff --git a/lnet/klnds/lgmlnd/lgmnal_api.c b/lnet/klnds/lgmlnd/lgmnal_api.c deleted file mode 100644 index 8322e83..0000000 --- a/lnet/klnds/lgmlnd/lgmnal_api.c +++ /dev/null @@ -1,527 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Implements the API NAL functions - */ - -#include "lgmnal.h" - -lgmnal_data_t *global_nal_data = NULL; -/* - * lgmnal_api_forward - * This function takes a pack block of arguments from the NAL API - * module and passes them to the NAL CB module. The CB module unpacks - * the args and calls the appropriate function indicated by index. - * Typically this function is used to pass args between kernel and use - * space. - * As lgmanl exists entirely in kernel, just pass the arg block directly to - * the NAL CB, buy passing the args to lib_dispatch - * Arguments are - * nal_t nal Our nal - * int index the api function that initiated this call - * void *args packed block of function args - * size_t arg_len length of args block - * void *ret A return value for the API NAL - * size_t ret_len Size of the return value - * - */ - -int -lgmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, - void *ret, size_t ret_len) -{ - - nal_cb_t *nal_cb = NULL; - lgmnal_data_t *nal_data = NULL; - - - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_forward: nal [%p], index [%d], args [%p], arglen [%d], ret [%p], retlen [%d]\n", nal, index, args, arg_len, ret, ret_len)); - - if (!nal || !args || (index < 0) || (arg_len < 0)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Bad args to lgmnal_api_forward\n")); -#ifdef LGMNAL_DEBUG - if (!nal) - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No nal specified\n")); - if (!args) - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No args specified\n")); - if (index < 0) - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Index is negative[%d]\n", index)); - if (arg_len < 0) - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("arg_len is negative [%d]\n", arg_len)); -#endif - return (PTL_FAIL); - } - - if (ret && (ret_len <= 0)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Bad args to lgmnal_api_forward\n")); -#ifdef LGMNAL_DEBUG - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("ret_len is [%d]\n", ret_len)); -#endif - return (PTL_FAIL); - } - - - if (!nal->nal_data) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad nal, no nal data\n")); - return (PTL_FAIL); - } - - nal_data = nal->nal_data; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("nal_data is [%p]\n", nal_data)); - - if (!nal_data->nal_cb) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad nal_data, no nal_cb\n")); - return (PTL_FAIL); - } - - nal_cb = nal_data->nal_cb; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("nal_cb is [%p]\n", nal_cb)); - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_api_forward calling lib_dispatch\n")); - lib_dispatch(nal_cb, NULL, index, args, ret); - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_api_forward returns from lib_dispatch\n")); - - return(PTL_OK); -} - - -/* - * lgmnal_api_shutdown - * Close down this interface and free any resources associated with it - * nal_t nal our nal to shutdown - */ -int -lgmnal_api_shutdown(nal_t *nal, int interface) -{ - - lgmnal_data_t *nal_data = nal->nal_data; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_shutdown: nal_data [%p]\n", nal_data)); - - /* - * TO DO lgmnal_api_shutdown what is to be done? - */ - - return(PTL_OK); -} - - -/* - * lgmnal_api_validate - * validate a user address for use in communications - * There's nothing to be done here - */ -int -lgmnal_api_validate(nal_t *nal, void *base, size_t extent) -{ - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_validate : nal [%p], base [%p], extent [%d]\n", nal, base, extent)); - - return(PTL_OK); -} - - - -/* - * lgmnal_api_yield - * Give up the processor - */ -void -lgmnal_api_yield(nal_t *nal) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_yield : nal [%p]\n", nal)); - - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - - return; -} - - - -/* - * lgmnal_api_lock - * Take a threadsafe lock - */ -void -lgmnal_api_lock(nal_t *nal, unsigned long *flags) -{ - - lgmnal_data_t *nal_data; - nal_cb_t *nal_cb; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_lock : nal [%p], flagsa [%p] flags[%ul]\n", nal, flags, *flags)); - - nal_data = nal->nal_data; - nal_cb = nal_data->nal_cb; - - nal_cb->cb_cli(nal_cb, flags); -/* - LGMNAL_API_LOCK(nal_data); -*/ - - return; -} - -/* - * lgmnal_api_unlock - * Release a threadsafe lock - */ -void -lgmnal_api_unlock(nal_t *nal, unsigned long *flags) -{ - lgmnal_data_t *nal_data; - nal_cb_t *nal_cb; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_lock : nal [%p], flags [%p]\n", nal, flags)); - - nal_data = nal->nal_data; - if (!nal_data) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_api_unlock bad nal, no nal_data\n")); - } - nal_cb = nal_data->nal_cb; - if (!nal_cb) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_api_unlock bad nal_data, no nal_cb\n")); - } - - nal_cb->cb_sti(nal_cb, flags); -/* - LGMNAL_API_UNLOCK(nal_data); -*/ - - return; -} - - -nal_t * -lgmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, ptl_pid_t rpid) -{ - - nal_t *nal = NULL; - nal_cb_t *nal_cb = NULL; - lgmnal_data_t *nal_data = NULL; - lgmnal_srxd_t *srxd = NULL; - gm_status_t gm_status; - unsigned int local_nid = 0, global_nid = 0; - ptl_nid_t portals_nid; - ptl_pid_t portals_pid = 0; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_init : interface [%d], ptl_size [%d], ac_size[%d]\n", - interface, ptl_size, ac_size)); - - if ((interface < 0) || (interface > LGMNAL_NUM_IF) || (ptl_size <= 0) || (ac_size <= 0) ) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad args\n")); - return(NULL); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("parameters check out ok\n")); - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Acquired global lock\n")); - - - PORTAL_ALLOC(nal_data, sizeof(lgmnal_data_t)); - if (!nal_data) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("can't get memory\n")); - return(NULL); - } - memset(nal_data, 0, sizeof(lgmnal_data_t)); - /* - * set the small message buffer size - */ - nal_data->refcnt = 1; - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal_data[%p]\n", nal_data)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("small_msg_size is [%d]\n", nal_data->small_msg_size)); - - PORTAL_ALLOC(nal, sizeof(nal_t)); - if (!nal) { - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - return(NULL); - } - memset(nal, 0, sizeof(nal_t)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal[%p]\n", nal)); - - PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); - if (!nal_cb) { - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - return(NULL); - } - memset(nal_cb, 0, sizeof(nal_cb_t)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal_cb[%p]\n", nal_cb)); - - LGMNAL_INIT_NAL(nal); - LGMNAL_INIT_NAL_CB(nal_cb); - /* - * String them all together - */ - nal->nal_data = (void*)nal_data; - nal_cb->nal_data = (void*)nal_data; - nal_data->nal = nal; - nal_data->nal_cb = nal_cb; - - LGMNAL_API_LOCK_INIT(nal_data); - LGMNAL_CB_LOCK_INIT(nal_data); - LGMNAL_GM_LOCK_INIT(nal_data); - - - /* - * initialise the interface, - */ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling gm_init\n")); - if (gm_init() != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("call to gm_init failed\n")); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_open with interface [%d], port [%d], name [%s], version [%d]\n", interface, LGMNAL_GM_PORT, "lgmnal", GM_API_VERSION)); - - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_open(&nal_data->gm_port, 0, LGMNAL_GM_PORT, "lgmnal", GM_API_VERSION); - LGMNAL_GM_UNLOCK(nal_data); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_open returned [%d]\n", gm_status)); - if (gm_status == GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_open succeeded port[%p]\n", nal_data->gm_port)); - } else { - switch(gm_status) { - case(GM_INVALID_PARAMETER): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Invalid Parameter\n")); - break; - case(GM_BUSY): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. GM Busy\n")); - break; - case(GM_NO_SUCH_DEVICE): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. No such device\n")); - break; - case(GM_INCOMPATIBLE_LIB_AND_DRIVER): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Incompatile lib and driver\n")); - break; - case(GM_OUT_OF_MEMORY): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Out of Memory\n")); - break; - default: - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Unknow error code [%d]\n", gm_status)); - break; - } - LGMNAL_GM_LOCK(nal_data); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - - - nal_data->small_msg_size = lgmnal_small_msg_size; - nal_data->small_msg_gmsize = gm_min_size_for_length(lgmnal_small_msg_size); - - if (lgmnal_alloc_srxd(nal_data) != LGMNAL_STATUS_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to allocate small rx descriptors\n")); - lgmnal_free_stxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - - - /* - * Hang out a bunch of small receive buffers - * In fact hang them all out - */ - while((srxd = lgmnal_get_srxd(nal_data, 0))) { - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("giving [%p] to gm_provide_recvive_buffer\n", srxd->buffer)); - LGMNAL_GM_LOCK(nal_data); - gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, - srxd->gmsize, GM_LOW_PRIORITY, 0); - LGMNAL_GM_UNLOCK(nal_data); - } - - /* - * Allocate pools of small tx buffers and descriptors - */ - if (lgmnal_alloc_stxd(nal_data) != LGMNAL_STATUS_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to allocate small tx descriptors\n")); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - - /* - * Start the recieve thread - * Initialise the gm_alarm we will use to wake the thread is - * it needs to be stopped - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Initializing receive thread alarm and flag\n")); - gm_initialize_alarm(&nal_data->rxthread_alarm); - nal_data->rxthread_flag = LGMNAL_THREAD_START; - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Starting receive thread\n")); - nal_data->rxthread_pid = kernel_thread(lgmnal_receive_thread, (void*)nal_data, 0); - if (nal_data->rxthread_pid <= 0) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Receive thread failed to start\n")); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - while (nal_data->rxthread_flag != LGMNAL_THREAD_STARTED) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1024); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Waiting for receive thread signs of life\n")); - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("receive thread seems to have started\n")); - nal_data->rxthread_flag = LGMNAL_THREAD_CONTINUE; - - - - /* - * Initialise the portals library - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Getting node id\n")); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_get_node_id(nal_data->gm_port, &local_nid); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - lgmnal_stop_rxthread(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("can't determine node id\n")); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - nal_data->gm_local_nid = local_nid; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local node id is [%u]\n", local_nid)); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid, &global_nid); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to obtain global id\n")); - lgmnal_stop_rxthread(nal_data); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Global node id is [%u][%x]\n", global_nid)); - nal_data->gm_global_nid = global_nid; - -/* - pid = gm_getpid(); -*/ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_pid is [%u]\n", portals_pid)); - portals_nid = (unsigned long)global_nid; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_nid is [%lu]\n", portals_nid)); - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling lib_init\n")); - if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, ac_size) != PTL_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lib_init failed\n")); - lgmnal_stop_rxthread(nal_data); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_init finished\n")); - global_nal_data = nal->nal_data; - return(nal); -} - - - -/* - * Called when module removed - */ -void lgmnal_fini() -{ - lgmnal_data_t *nal_data = global_nal_data; - nal_t *nal = nal_data->nal; - nal_cb_t *nal_cb = nal_data->nal_cb; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_fini\n")); - - PtlNIFini(lgmnal_ni); - lib_fini(nal_cb); - - lgmnal_stop_rxthread(nal_data); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); -} - -EXPORT_SYMBOL(lgmnal_init); -EXPORT_SYMBOL(lgmnal_fini); -EXPORT_SYMBOL(lgmnal_api_forward); -EXPORT_SYMBOL(lgmnal_api_validate); -EXPORT_SYMBOL(lgmnal_api_yield); -EXPORT_SYMBOL(lgmnal_api_lock); -EXPORT_SYMBOL(lgmnal_api_unlock); -EXPORT_SYMBOL(lgmnal_api_shutdown); diff --git a/lnet/klnds/lgmlnd/lgmnal_cb.c b/lnet/klnds/lgmlnd/lgmnal_cb.c deleted file mode 100644 index dcd5446..0000000 --- a/lnet/klnds/lgmlnd/lgmnal_cb.c +++ /dev/null @@ -1,258 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -/* - * This file implements the nal cb functions - */ - - -#include "lgmnal.h" - -int lgmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) -{ - lgmnal_srxd_t *srxd = (lgmnal_srxd_t*)private; - int status = PTL_OK; - lgmnal_data_t *nal_data = nal_cb->nal_data; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_recv nal_cb [%p],private[%p], cookie[%p], niov[%d], iov [%p], mlen[%d], rlen[%d]\n", nal_cb, private, cookie, niov, iov, mlen, rlen)); - - if (srxd->type == LGMNAL_SMALL_MESSAGE) { - if (!LGMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, mlen)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_cb_recv. This is not a small message\n")); - } - status = lgmnal_small_receive2(nal_cb, private, cookie, niov, iov, mlen, rlen); - } - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_return status [%d]\n", status)); - return(status); -} - -int lgmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int kniov, ptl_kiov_t *kiov, size_t mlen, size_t rlen) -{ - lgmnal_srxd_t *srxd = (lgmnal_srxd_t*)private; - int status = PTL_OK; - struct iovec *iovec = NULL; - int i = 0; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_recv_pages nal_cb [%p],private[%p], cookie[%p], kniov[%d], kiov [%p], mlen[%d], rlen[%d]\n", nal_cb, private, cookie, kniov, kiov, mlen, rlen)); - - if (srxd->type == LGMNAL_SMALL_MESSAGE) { - PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov); - if (!iovec) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Can't malloc\n")); - return(LGMNAL_STATUS_FAIL); - } - - /* - * map each page and create an iovec for it - */ - for (i=0; ikiov_page, kiov->kiov_len, kiov->kiov_offset)); - iovec->iov_len = kiov->kiov_len; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling kmap", kiov->kiov_page)); - iovec->iov_base = kmap(kiov->kiov_page) + kiov->kiov_offset; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling iov_base is [%p]", iovec->iov_base)); - iovec->iov_len = kiov->kiov_len; - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("calling lgmnal_small_receive2\n")); - status = lgmnal_small_receive2(nal_cb, private, cookie, kniov, iovec, mlen, rlen); - PORTAL_FREE(iovec, sizeof(struct iovec)*kniov); - } - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_return status [%d]\n", status)); - return(status); -} - - -int lgmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, - int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, size_t len) -{ - - lgmnal_data_t *nal_data; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_sendnid [%lu] niov[%d] len[%d]\n", nid, niov, len)); - nal_data = nal_cb->nal_data; - - if (LGMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is a small message send\n")); - lgmnal_small_transmit(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("This is a large message send it is not supported yet\n")); -/* - lgmnal_large_transmit1(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); -*/ - return(LGMNAL_STATUS_FAIL); - } - return(PTL_OK); -} - -int lgmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, - int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len) -{ - - int i = 0; - lgmnal_data_t *nal_data; - struct iovec *iovec; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_send_pages nid [%lu] niov[%d] len[%d]\n", nid, kniov, len)); - nal_data = nal_cb->nal_data; - if (LGMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) { - /* TO DO fix small message for send pages */ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is a small message send\n")); - PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec)); - - for (i=0; ikiov_page, kiov->kiov_len, kiov->kiov_offset)); - iovec->iov_len = kiov->kiov_len; - iovec->iov_base = kmap(kiov->kiov_page) + kiov->kiov_offset; - iovec->iov_len = kiov->kiov_len; - } - lgmnal_small_transmit(nal_cb, private, cookie, hdr, type, nid, pid, kniov, iovec, len); - PORTAL_FREE(iovec, kniov*sizeof(struct iovec)); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("This is a large message send it is not supported yet\n")); -/* - lgmnal_large_transmit1(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); -*/ - return(LGMNAL_STATUS_FAIL); - } - return(PTL_OK); -} - -int lgmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst, user_ptr src, size_t len) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_read dst [%p] src [%p] len[%d]\n", dst, src, len)); - gm_bcopy(src, dst, len); - return(PTL_OK); -} - -int lgmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, void *src, size_t len) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_write :: dst [%p] src [%p] len[%d]\n", dst, src, len)); - gm_bcopy(src, dst, len); - return(PTL_OK); -} - -int lgmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_callback nal_cb[%p], private[%p], eq[%p], ev[%p]\n", nal_cb, private, eq, ev)); - - if (eq->event_callback != NULL) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("found callback\n")); - eq->event_callback(ev); - } - - return(PTL_OK); -} - -void *lgmnal_cb_malloc(nal_cb_t *nal_cb, size_t len) -{ - void *ptr = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_malloc len[%d]\n", len)); - PORTAL_ALLOC(ptr, len); - return(ptr); -} - -void lgmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_free :: buf[%p] len[%d]\n", buf, len)); - PORTAL_FREE(buf, len); - return; -} - -void lgmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, void **addrkey) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_unmap niov[%d] iov[%], addrkey[%p]\n", niov, iov, addrkey)); - return; -} - -int lgmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, void**addrkey) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_map niov[%d], iov[%p], addrkey[%p], niov, iov, addrkey\n")); - return(PTL_OK); -} - -void lgmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_printf\n")); - lgmnal_print(fmt); - return; -} - -void lgmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags) -{ - lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; - spinlock_t cb_lock = nal_data->cb_lock; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_cli\n")); -/* - local_irq_save(*flags); - spin_lock_irqsave(&cb_lock, *flags); -*/ - spin_lock(&cb_lock); - return; -} - -void lgmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags) -{ - lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; - spinlock_t cb_lock = nal_data->cb_lock; - -/* - local_irq_restore(*flags); - spin_unlock_irqrestore(&cb_lock, *flags); -*/ - spin_unlock(&cb_lock); - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_sti\n")); - return; -} - -int lgmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_dist\n")); - if (dist) - *dist = 27; - return(PTL_OK); -} - - - - -EXPORT_SYMBOL(lgmnal_cb_send); -EXPORT_SYMBOL(lgmnal_cb_send_pages); -EXPORT_SYMBOL(lgmnal_cb_recv); -EXPORT_SYMBOL(lgmnal_cb_recv_pages); -EXPORT_SYMBOL(lgmnal_cb_read); -EXPORT_SYMBOL(lgmnal_cb_write); -EXPORT_SYMBOL(lgmnal_cb_cli); -EXPORT_SYMBOL(lgmnal_cb_sti); -EXPORT_SYMBOL(lgmnal_cb_dist); -EXPORT_SYMBOL(lgmnal_cb_printf); -EXPORT_SYMBOL(lgmnal_cb_map); -EXPORT_SYMBOL(lgmnal_cb_unmap); -EXPORT_SYMBOL(lgmnal_cb_callback); -EXPORT_SYMBOL(lgmnal_cb_free); -EXPORT_SYMBOL(lgmnal_cb_malloc); diff --git a/lnet/klnds/lgmlnd/lgmnal_comm.c b/lnet/klnds/lgmlnd/lgmnal_comm.c deleted file mode 100644 index 4cd1b83..0000000 --- a/lnet/klnds/lgmlnd/lgmnal_comm.c +++ /dev/null @@ -1,477 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -/* - * This file contains all lgmnal send and receive functions - */ - -#include "lgmnal.h" - -int -lgmnal_requeue_rxbuffer(lgmnal_data_t *nal_data, lgmnal_srxd_t *srxd) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_requeue_rxbuffer\n")); - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("requeueing srxd[%p] nal_data[%p]\n", srxd, nal_data)); - - LGMNAL_GM_LOCK(nal_data); - gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, - srxd->gmsize, GM_LOW_PRIORITY, 0 ); - LGMNAL_GM_UNLOCK(nal_data); - - return(LGMNAL_STATUS_OK); -} - - -/* - * Handle a bad message - * A bad message is one we don't expect or can't interpret - */ -int -lgmnal_badrx_message(lgmnal_data_t *nal_data, gm_recv_t *recv, lgmnal_srxd_t *srxd) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Can't handle message\n")); - - if (!srxd) - srxd = lgmnal_rxbuffer_to_srxd(nal_data, gm_ntohp(recv->buffer)); - if (srxd) { - lgmnal_requeue_rxbuffer(nal_data, srxd); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Can't find a descriptor for this buffer\n")); - /* - * get rid of it ? - */ - return(LGMNAL_STATUS_FAIL); - } - - return(LGMNAL_STATUS_OK); -} - - -/* - * Start processing a small message receive - * Get here from lgmnal_receive_thread - * Hand off to lib_parse, which calls cb_recv - * which hands back to lgmnal_small_receive2 - * Deal with all endian stuff here (if we can!) - */ -int -lgmnal_small_receive1(lgmnal_data_t *nal_data, gm_recv_t *recv) -{ - lgmnal_srxd_t *srxd = NULL; - void *buffer = NULL; - unsigned int snode, sport, type, length; - lgmnal_msghdr_t *lgmnal_msghdr; - ptl_hdr_t *portals_hdr; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_receive1 nal_data [%p], recv [%p]\n", nal_data, recv)); - - buffer = gm_ntohp(recv->buffer);; - snode = (int)gm_ntoh_u16(recv->sender_node_id); - sport = (int)gm_ntoh_u8(recv->sender_port_id); - type = (int)gm_ntoh_u8(recv->type); - buffer = gm_ntohp(recv->buffer); - length = (int) gm_ntohl(recv->length); - - lgmnal_msghdr = (lgmnal_msghdr_t*)buffer; - portals_hdr = (ptl_hdr_t*)(buffer+LGMNAL_MSGHDR_SIZE); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("rx_event:: Sender node [%d], Sender Port [%d], type [%d], length [%d], buffer [%p]\n", - snode, sport, type, length, buffer)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_msghdr:: Sender node [%u], magic [%lx], type [%d]\n", - lgmnal_msghdr->sender_node_id, lgmnal_msghdr->magic, lgmnal_msghdr->type)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_hdr:: Sender node [%ul], dest_node [%ul]\n", - portals_hdr->src_nid, portals_hdr->dest_nid)); - - - /* - * Get a transmit descriptor for this message - */ - srxd = lgmnal_rxbuffer_to_srxd(nal_data, buffer); - LGMNAL_PRINT(LGMNAL_DEBUG, ("Back from lgmnal_rxbuffer_to_srxd\n")); - if (!srxd) { - LGMNAL_PRINT(LGMNAL_DEBUG, ("Failed to get receive descriptor for this buffer\n")); - lib_parse(nal_data->nal_cb, portals_hdr, srxd); - return(LGMNAL_STATUS_FAIL); - } - srxd->type = LGMNAL_SMALL_MESSAGE; - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling lib_parse buffer is [%p]\n", buffer+LGMNAL_MSGHDR_SIZE)); - /* - * control passes to lib, which calls cb_recv - * cb_recv is responsible for returning the buffer - * for future receive - */ - lib_parse(nal_data->nal_cb, portals_hdr, srxd); - - return(LGMNAL_STATUS_OK); -} - -/* - * Get here from lgmnal_receive_thread, lgmnal_small_receive1 - * lib_parse, cb_recv - * Put data from prewired receive buffer into users buffer(s) - * Hang out the receive buffer again for another receive - * Call lib_finalize - */ -int -lgmnal_small_receive2(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int niov, - struct iovec *iov, size_t mlen, size_t rlen) -{ - lgmnal_srxd_t *srxd = NULL; - void *buffer = NULL; - lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_receive2 niov [%d] mlen[%d]\n", niov, mlen)); - - if (!private) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_small_receive2 no context\n")); - lib_finalize(nal_cb, private, cookie); - return(PTL_FAIL); - } - - srxd = (lgmnal_srxd_t*)private; - buffer = srxd->buffer; - buffer += sizeof(lgmnal_msghdr_t); - buffer += sizeof(ptl_hdr_t); - - while(niov--) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing [%p] len [%d]\n", iov, iov->iov_len)); - gm_bcopy(buffer, iov->iov_base, iov->iov_len); - buffer += iov->iov_len; - iov++; - } - - - /* - * let portals library know receive is complete - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling lib_finalize\n")); - if (lib_finalize(nal_cb, private, cookie) != PTL_OK) { - /* TO DO what to do with failed lib_finalise? */ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lib_finalize failed\n")); - } - /* - * return buffer so it can be used again - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling gm_provide_receive_buffer\n")); - LGMNAL_GM_LOCK(nal_data); - gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, srxd->gmsize, GM_LOW_PRIORITY, 0); - LGMNAL_GM_UNLOCK(nal_data); - - return(PTL_OK); -} - - - -/* - * The recevive thread - * This guy wait in gm_blocking_recvive and gets - * woken up when the myrinet adaptor gets an interrupt. - * Hands off processing of small messages and blocks again - */ -int -lgmnal_receive_thread(void *arg) -{ - lgmnal_data_t *nal_data; - gm_recv_event_t *rxevent = NULL; - gm_recv_t *recv = NULL; - void *buffer; - - if (!arg) { - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("RXTHREAD:: This is the lgmnal_receive_thread. NO nal_data. Exiting\n", arg)); - return(-1); - } - - nal_data = (lgmnal_data_t*)arg; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("RXTHREAD:: This is the lgmnal_receive_thread nal_data is [%p]\n", arg)); - - nal_data->rxthread_flag = LGMNAL_THREAD_STARTED; - while (nal_data->rxthread_flag == LGMNAL_THREAD_STARTED) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: lgmnal_receive_threads waiting for LGMNAL_CONTINUE flag\n")); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1024); - - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: calling daemonize\n")); - daemonize(); - LGMNAL_GM_LOCK(nal_data); - while(nal_data->rxthread_flag == LGMNAL_THREAD_CONTINUE) { - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Receive thread waiting\n")); - rxevent = gm_blocking_receive_no_spin(nal_data->gm_port); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: receive thread got [%s]\n", lgmnal_rxevent(rxevent))); - if (nal_data->rxthread_flag != LGMNAL_THREAD_CONTINUE) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Receive thread time to exit\n")); - break; - } - switch (GM_RECV_EVENT_TYPE(rxevent)) { - - case(GM_RECV_EVENT): - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: GM_RECV_EVENT\n")); - recv = (gm_recv_t*)&(rxevent->recv); - buffer = gm_ntohp(recv->buffer); - if (((lgmnal_msghdr_t*)buffer)->type == LGMNAL_SMALL_MESSAGE) { - LGMNAL_GM_UNLOCK(nal_data); - lgmnal_small_receive1(nal_data, recv); - LGMNAL_GM_LOCK(nal_data); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("RXTHREAD:: Unsupported message type\n")); - lgmnal_badrx_message(nal_data, recv, NULL); - } - break; - case(_GM_SLEEP_EVENT): - /* - * Blocking receive above just returns - * immediatly with _GM_SLEEP_EVENT - * Don't know what this is - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Sleeping in gm_unknown\n")); - LGMNAL_GM_UNLOCK(nal_data); - gm_unknown(nal_data->gm_port, rxevent); - LGMNAL_GM_LOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Awake from gm_unknown\n")); - break; - - default: - /* - * Don't know what this is - * gm_unknown will make sense of it - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Passing event to gm_unknown\n")); - gm_unknown(nal_data->gm_port, rxevent); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Processed unknown event\n")); - - } - - - } - LGMNAL_GM_UNLOCK(nal_data); - nal_data->rxthread_flag = LGMNAL_THREAD_STOPPED; - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("RXTHREAD:: The lgmnal_receive_thread nal_data [%p] is exiting\n", nal_data)); - return(LGMNAL_STATUS_OK); -} - - -int -lgmnal_small_transmit(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, - ptl_nid_t global_nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, int size) -{ - lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; - lgmnal_stxd_t *stxd = NULL; - void *buffer = NULL; - lgmnal_msghdr_t *msghdr = NULL; - int tot_size = 0; - unsigned int local_nid; - gm_status_t gm_status = GM_SUCCESS; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_transmit nal_cb [%p] private [%p] cookie [%p] hdr [%p] type [%d] global_nid [%u][%x] pid [%d] niov [%d] iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, global_nid, global_nid, pid, niov, iov, size)); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_hdr:: dest_nid [%lu], src_nid [%lu]\n", hdr->dest_nid, hdr->src_nid)); - - if (!nal_data) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("no nal_data\n")); - return(LGMNAL_STATUS_FAIL); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("nal_data [%p]\n", nal_data)); - } - - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, &local_nid); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to obtain local id\n")); - return(LGMNAL_STATUS_FAIL); - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local Node_id is [%u][%x]\n", local_nid, local_nid)); - - stxd = lgmnal_get_stxd(nal_data, 1); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("stxd [%p]\n", stxd)); - - stxd->type = LGMNAL_SMALL_MESSAGE; - stxd->cookie = cookie; - - /* - * Copy lgmnal_msg_hdr and portals header to the transmit buffer - * Then copy the data in - */ - buffer = stxd->buffer; - msghdr = (lgmnal_msghdr_t*)buffer; - - msghdr->magic = LGMNAL_MAGIC; - msghdr->type = LGMNAL_SMALL_MESSAGE; - msghdr->sender_node_id = nal_data->gm_global_nid; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing msghdr at [%p]\n", buffer)); - - buffer += sizeof(lgmnal_msghdr_t); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Advancing buffer pointer by [%x] to [%p]\n", sizeof(lgmnal_msghdr_t), buffer)); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing portals hdr at [%p]\n", buffer)); - gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); - - buffer += sizeof(ptl_hdr_t); - - while(niov--) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing iov [%p] len [%d] to [%p]\n", iov, iov->iov_len, buffer)); - gm_bcopy(iov->iov_base, buffer, iov->iov_len); - buffer+= iov->iov_len; - iov++; - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("sending\n")); - tot_size = size+sizeof(ptl_hdr_t)+sizeof(lgmnal_msghdr_t); - - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_send_to_peer port [%p] buffer [%p] gmsize [%d] msize [%d] global_nid [%u][%x] local_nid[%d] stxd [%p]\n", - nal_data->gm_port, stxd->buffer, stxd->gmsize, tot_size, global_nid, global_nid, local_nid, stxd)); - LGMNAL_GM_LOCK(nal_data); - gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, stxd->gmsize, tot_size, GM_LOW_PRIORITY, local_nid, lgmnal_small_tx_done, (void*)stxd); - - LGMNAL_GM_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("done\n")); - - return(PTL_OK); -} - - -void -lgmnal_small_tx_done(gm_port_t *gm_port, void *context, gm_status_t status) -{ - lgmnal_stxd_t *stxd = (lgmnal_stxd_t*)context; - lib_msg_t *cookie = stxd->cookie; - lgmnal_data_t *nal_data = (lgmnal_data_t*)stxd->nal_data; - nal_cb_t *nal_cb = nal_data->nal_cb; - - if (!stxd) { - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("send completion event for unknown stxd\n")); - return; - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Result of send stxd [%p] is [%s]\n", stxd, lgmnal_gm_error(status))); - /* TO DO figure out which sends are worth retrying and get a send token to retry */ - if (lib_finalize(nal_cb, stxd, cookie) != PTL_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Call to lib_finalize failed for stxd [%p]\n", stxd)); - } - lgmnal_return_stxd(nal_data, stxd); - return; -} - - -void -lgmnal_large_tx1_done(gm_port_t *gm_port, void *context, gm_status_t status) -{ - -} - -/* - * Begin a large transmit - */ -int -lgmnal_large_transmit1(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, - ptl_nid_t global_nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, int size) -{ - - lgmnal_data_t *nal_data; - lgmnal_stxd_t *stxd = NULL; - void *buffer = NULL; - lgmnal_msghdr_t *msghdr = NULL; - unsigned int local_nid; - int mlen = 0; /* the size of the init message data */ - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_large_transmit1 nal_cb [%p] private [%p], cookie [%p] hdr [%p], type [%d] global_nid [%u], pid [%d], - niov [%d], iov [%p], size [%d]\n", - nal_cb, private, cookie, hdr, type, global_nid, pid, niov, iov, size)); - - if (nal_cb) - nal_data = (lgmnal_data_t*)nal_cb->nal_data; - else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("no nal_cb.\n")); - return(LGMNAL_STATUS_FAIL); - } - - - /* - * TO DO large transmit uses stxd. Should it have control descriptor? - */ - stxd = lgmnal_get_stxd(nal_data, 1); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("stxd [%p]\n", stxd)); - - stxd->type = LGMNAL_LARGE_MESSAGE_INIT; - stxd->cookie = cookie; - - /* - * Copy lgmnal_msg_hdr and portals header to the transmit buffer - * Then copy the iov in - */ - buffer = stxd->buffer; - msghdr = (lgmnal_msghdr_t*)buffer; - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing msghdr at [%p]\n", buffer)); - - msghdr->magic = LGMNAL_MAGIC; - msghdr->type = LGMNAL_LARGE_MESSAGE_INIT; - msghdr->sender_node_id = nal_data->gm_global_nid; - msghdr->stxd = stxd; - buffer += sizeof(lgmnal_msghdr_t); - mlen = sizeof(lgmnal_msghdr_t); - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing portals hdr at [%p]\n", buffer)); - - gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); - buffer += sizeof(ptl_hdr_t); - mlen += sizeof(ptl_hdr_t); - - /* - * Store the iovs in the stxd for we can get them later - * in large_transmit2 - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Copying iov [%p] to [%p]\n", iov, stxd->iov)); - gm_bcopy(iov, stxd->iov, niov*sizeof(struct iovec)); - stxd->niov = niov; - - /* - * Send the init message to the target - */ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("sending mlen [%d]\n", mlen)); - LGMNAL_GM_LOCK(nal_data); - gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, stxd->gmsize, mlen, GM_LOW_PRIORITY, local_nid, lgmnal_large_tx1_done, (void*)stxd); - LGMNAL_GM_UNLOCK(nal_data); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("done\n")); - - return(PTL_OK); -} - - - - -EXPORT_SYMBOL(lgmnal_requeue_rxbuffer); -EXPORT_SYMBOL(lgmnal_badrx_message); -EXPORT_SYMBOL(lgmnal_large_tx1_done); -EXPORT_SYMBOL(lgmnal_large_transmit1); -EXPORT_SYMBOL(lgmnal_small_receive1); -EXPORT_SYMBOL(lgmnal_small_receive2); -EXPORT_SYMBOL(lgmnal_receive_thread); -EXPORT_SYMBOL(lgmnal_small_transmit); -EXPORT_SYMBOL(lgmnal_small_tx_done); diff --git a/lnet/klnds/lgmlnd/lgmnal_module.c b/lnet/klnds/lgmlnd/lgmnal_module.c deleted file mode 100644 index ce870f0..0000000 --- a/lnet/klnds/lgmlnd/lgmnal_module.c +++ /dev/null @@ -1,137 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include "lgmnal.h" - - -ptl_handle_ni_t lgmnal_ni; - - -int -lgmnal_cmd(struct portal_ioctl_data *data, void *private) -{ - lgmnal_data_t *nal_data = NULL; - char *name = NULL; - int nid = -2; - int gnid; - gm_status_t gm_status; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cmd [d] private [%p]\n", data->ioc_nal_cmd, private)); - nal_data = (lgmnal_data_t*)private; - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("nal_data is [%p]\n", nal_data)); - switch(data->ioc_nal_cmd) { - /* - * just reuse already defined GET_NID. Should define LGMNAL version - */ - case(LGMNAL_IOC_GET_GNID): - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_cmd GETNID (Get GM Global Network Id\n")); - - PORTAL_ALLOC(name, data->ioc_plen1); - copy_from_user(name, data->ioc_pbuf1, data->ioc_plen1); - - LGMNAL_GM_LOCK(nal_data); - nid = gm_host_name_to_node_id(nal_data->gm_port, name); - LGMNAL_GM_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local node id is [%d]\n", nid)); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_node_id_to_global_id(nal_data->gm_port, nid, &gnid); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_node_id_to_global_id failed\n", gm_status)); - return(-1); - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Global node is is [%u][%x]\n", gnid, gnid)); - copy_to_user(data->ioc_pbuf2, &gnid, data->ioc_plen2); - break; - default: - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_cmd UNKNOWN[%d]\n", data->ioc_nal_cmd)); - data->ioc_nid2 = -1; - } - - - return(0); -} - -int lgmnal_small_msg_size = 81920; -int lgmnal_debug_level = 1; - -int -init_module() -{ - int status; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("This is the lgmnal module initialisation routine\n")); - - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling lgmnal_init\n")); - status = PtlNIInit(lgmnal_init, 32, 4, 0, &lgmnal_ni); - if (status == PTL_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Portals LGMNAL initialised ok lgmnal_ni [%lx]\n", lgmnal_ni)); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Portals LGMNAL Failed to initialise\n")); - return(1); - - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling kportal_nal_register\n")); - /* - * global_nal_data is set by lgmnal_init - */ - if (kportal_nal_register(LGMNAL, &lgmnal_cmd, global_nal_data) != 0) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("kportal_nal_register failed\n")); - return(1); - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling PORTAL_SYMBOL_REGISTER\n")); - PORTAL_SYMBOL_REGISTER(lgmnal_ni); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is the end of the lgmnal module initialisation routine")); - - - return(0); -} - - -void cleanup_module() -{ - int interface=0; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Cleaning up lgmnal module")); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Interface [%d] Calling shutdown\n", interface)); - kportal_nal_unregister(LGMNAL); - PORTAL_SYMBOL_UNREGISTER(lgmnal_ni); - lgmnal_fini(); - global_nal_data = NULL; - return; -} - - -EXPORT_SYMBOL(lgmnal_ni); -EXPORT_SYMBOL(lgmnal_debug_level); - -MODULE_PARM(lgmnal_small_msg_size, "i"); -MODULE_PARM(lgmnal_debug_level, "i"); - -MODULE_AUTHOR("Morgan Doyle. morgan.doyle@hp.com"); - -MODULE_DESCRIPTION("A Portals kernel NAL for Myrinet GM2. [0= DEFAULT_LEN) { - PORTAL_ALLOC(varbuf, len+1+16); - if (!varbuf) { - printk("LustreError: lgmnal_cb_printf Failed to malloc\n"); - printk("Lustre: Truncated message is\n"); - printk(fixedbuf); - va_end(ap); - return; - } - sprintf(varbuf, "Lustre: LGMNAL::"); - len = vsnprintf(varbuf+16, len+1, fmt, ap); - } else { - varbuf = fixedbuf; - } - va_end(ap); - printk(varbuf); - if (fixedbuf != varbuf) - PORTAL_FREE(varbuf, len+1+16); - return; -} - - -/* - * allocate a number of small tx buffers and register with GM - * so they are wired and set up for DMA. This is a costly operation. - * Also allocate a corrosponding descriptor to keep track of - * the buffer. - * Put all descriptors on singly linked list to be available to send function. - * This function is only called when the API mutex is held (init or shutdown), - * so there is no need to hold the txd spinlock. - */ -int -lgmnal_alloc_stxd(lgmnal_data_t *nal_data) -{ - int ntx = 0, nstx = 0, i = 0; - lgmnal_stxd_t *txd = NULL; - void *txbuffer = NULL; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_alloc_small tx\n")); - - LGMNAL_GM_LOCK(nal_data); - ntx = gm_num_send_tokens(nal_data->gm_port); - LGMNAL_GM_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("total number of send tokens available is [%d]\n", ntx)); - - nstx = ntx/2; - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocated [%d] send tokens to small messages\n", nstx)); - - -#ifdef LGMNAL_USE_GM_HASH - nal_data->stxd_hash = gm_create_hash(gm_hash_compare_ptrs, gm_hash_hash_ptr, 0, sizeof(void*), nstx, 0); - if (!nal_data->srxd_hash) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to create hash table\n\n")); - return(LGMNAL_STATUS_NOMEM); - } -#else - nal_data->stxd_hash = NULL; -#endif - - /* - * A semaphore is initialised with the - * number of transmit tokens available. - * To get a stxd, acquire the token semaphore. - * this decrements the available token count - * (if no tokens you block here, someone returning a - * stxd will release the semaphore and wake you) - * When token is obtained acquire the spinlock - * to manipulate the list - */ - LGMNAL_TXD_TOKEN_INIT(nal_data, nstx); - LGMNAL_TXD_LOCK_INIT(nal_data); - - for (i=0; i<=nstx; i++) { - PORTAL_ALLOC(txd, sizeof(lgmnal_stxd_t)); - if (!txd) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc txd [%d]\n", i)); - return(LGMNAL_STATUS_NOMEM); - } -#if 0 - PORTAL_ALLOC(txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); - if (!txbuffer) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc txbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); - PORTAL_FREE(txd, sizeof(lgmnal_stxd_t)); - return(LGMNAL_STATUS_FAIL); - } - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_register_memory with port [%p] txbuffer [%p], size [%d]\n", - nal_data->gm_port, txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data))); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_register_memory(nal_data->gm_port, txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_register_memory failed buffer [%p], index [%d]\n", txbuffer, i)); - switch(gm_status) { - case(GM_FAILURE): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_FAILURE\n")); - break; - case(GM_PERMISSION_DENIED): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_PERMISSION_DENIED\n")); - break; - case(GM_INVALID_PARAMETER): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_INVALID_PARAMETER\n")); - break; - default: - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Unknown error\n")); - break; - } - return(LGMNAL_STATUS_FAIL); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_register_memory ok for buffer [%p], index [%d]\n", txbuffer, i)); - } -#else - LGMNAL_GM_LOCK(nal_data); - txbuffer = gm_dma_malloc(nal_data->gm_port, LGMNAL_SMALL_MSG_SIZE(nal_data)); - LGMNAL_GM_UNLOCK(nal_data); - if (!txbuffer) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to gm_dma_malloc txbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); - PORTAL_FREE(txd, sizeof(lgmnal_stxd_t)); - return(LGMNAL_STATUS_FAIL); - } -#endif - - txd->buffer = txbuffer; - txd->size = LGMNAL_SMALL_MSG_SIZE(nal_data); - txd->gmsize = gm_min_size_for_length(txd->size); - txd->nal_data = (struct _lgmnal_data_t*)nal_data; - - if (lgmnal_hash_add(&nal_data->stxd_hash, (void*)txbuffer, (void*)txd)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to create hash entry\n")); - return(LGMNAL_STATUS_FAIL); - } - - - txd->next = nal_data->stxd; - nal_data->stxd = txd; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Registered txd [%p] with buffer [%p], size [%d]\n", txd, txd->buffer, txd->size)); - } - - return(LGMNAL_STATUS_OK); -} - -/* Free the list of wired and gm_registered small tx buffers and the tx descriptors - that go along with them. - * This function is only called when the API mutex is held (init or shutdown), - * so there is no need to hold the txd spinlock. - */ -void -lgmnal_free_stxd(lgmnal_data_t *nal_data) -{ - lgmnal_stxd_t *txd = nal_data->stxd, *_txd = NULL; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_free_small tx\n")); - - while(txd) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Freeing txd [%p] with buffer [%p], size [%d]\n", txd, txd->buffer, txd->size)); - _txd = txd; - txd = txd->next; -#if 0 - LGMNAL_GM_LOCK(nal_data); - gm_deregister_memory(nal_data->gm_port, _txd->buffer, _txd->size); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(_txd->buffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); -#else - LGMNAL_GM_LOCK(nal_data); - gm_dma_free(nal_data->gm_port, _txd->buffer); - LGMNAL_GM_UNLOCK(nal_data); -#endif - PORTAL_FREE(_txd, sizeof(lgmnal_stxd_t)); - } - return; -} - - -/* - * Get a txd from the list - * This get us a wired and gm_registered small tx buffer. - * This implicitly gets us a send token also. - */ -lgmnal_stxd_t * -lgmnal_get_stxd(lgmnal_data_t *nal_data, int block) -{ - - lgmnal_stxd_t *txd = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_get_stxd nal_data [%p] block[%d]\n", - nal_data, block)); - - if (block) { - LGMNAL_TXD_GETTOKEN(nal_data); - } else { - if (LGMNAL_TXD_TRYGETTOKEN(nal_data)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_get_stxd can't get token\n")); - return(NULL); - } - } - LGMNAL_TXD_LOCK(nal_data); - txd = nal_data->stxd; - if (txd) - nal_data->stxd = txd->next; - LGMNAL_TXD_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_get_stxd got [%p], head is [%p]\n", txd, nal_data->stxd)); - return(txd); -} - -/* - * Return a txd to the list - */ -void -lgmnal_return_stxd(lgmnal_data_t *nal_data, lgmnal_stxd_t *txd) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_return_stxd nal_data [%p], txd[%p]\n", nal_data, txd)); - - LGMNAL_TXD_LOCK(nal_data); - txd->next = nal_data->stxd; - nal_data->stxd = txd; - LGMNAL_TXD_UNLOCK(nal_data); - LGMNAL_TXD_RETURNTOKEN(nal_data); - return; -} - - -/* - * allocate a number of small rx buffers and register with GM - * so they are wired and set up for DMA. This is a costly operation. - * Also allocate a corrosponding descriptor to keep track of - * the buffer. - * Put all descriptors on singly linked list to be available to receive thread. - * This function is only called when the API mutex is held (init or shutdown), - * so there is no need to hold the rxd spinlock. - */ -int -lgmnal_alloc_srxd(lgmnal_data_t *nal_data) -{ - int nrx = 0, nsrx = 0, i = 0; - lgmnal_srxd_t *rxd = NULL; - void *rxbuffer = NULL; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_alloc_small rx\n")); - - LGMNAL_GM_LOCK(nal_data); - nrx = gm_num_receive_tokens(nal_data->gm_port); - LGMNAL_GM_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("total number of receive tokens available is [%d]\n", nrx)); - - nsrx = nrx/2; - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocated [%d] receive tokens to small messages\n", nsrx)); - - -#ifdef LGMNAL_USE_GM_HASH - LGMNAL_GM_LOCK(nal_data); - nal_data->srxd_hash = gm_create_hash(gm_hash_compare_ptrs, gm_hash_hash_ptr, 0, sizeof(void*), nsrx, 0); - LGMNAL_GM_UNLOCK(nal_data); - if (!nal_data->srxd_hash) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to create hash table\n")); - return(LGMNAL_STATUS_NOMEM); - } -#else - nal_data->srxd_hash = NULL; -#endif - - LGMNAL_RXD_TOKEN_INIT(nal_data, nsrx); - LGMNAL_RXD_LOCK_INIT(nal_data); - - for (i=0; i<=nsrx; i++) { - PORTAL_ALLOC(rxd, sizeof(lgmnal_srxd_t)); - if (!rxd) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc rxd [%d]\n", i)); - return(LGMNAL_STATUS_NOMEM); - } -#if 0 - PORTAL_ALLOC(rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); - if (!rxbuffer) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc rxbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); - PORTAL_FREE(rxd, sizeof(lgmnal_srxd_t)); - return(LGMNAL_STATUS_FAIL); - } - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_register_memory with port [%p] rxbuffer [%p], size [%d]\n", - nal_data->gm_port, rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data))); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_register_memory failed buffer [%p], index [%d]\n", rxbuffer, i)); - switch(gm_status) { - case(GM_FAILURE): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_FAILURE\n")); - break; - case(GM_PERMISSION_DENIED): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_PERMISSION_DENIED\n")); - break; - case(GM_INVALID_PARAMETER): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_INVALID_PARAMETER\n")); - break; - default: - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Unknown GM error[%d]\n", gm_status)); - break; - - } - return(LGMNAL_STATUS_FAIL); - } -#else - LGMNAL_GM_LOCK(nal_data); - rxbuffer = gm_dma_malloc(nal_data->gm_port, LGMNAL_SMALL_MSG_SIZE(nal_data)); - LGMNAL_GM_UNLOCK(nal_data); - if (!rxbuffer) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to gm_dma_malloc rxbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); - PORTAL_FREE(rxd, sizeof(lgmnal_srxd_t)); - return(LGMNAL_STATUS_FAIL); - } -#endif - - rxd->buffer = rxbuffer; - rxd->size = LGMNAL_SMALL_MSG_SIZE(nal_data); - rxd->gmsize = gm_min_size_for_length(rxd->size); - - if (lgmnal_hash_add(&nal_data->srxd_hash, (void*)rxbuffer, (void*)rxd) != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to create hash entry rxd[%p] for rxbuffer[%p]\n", rxd, rxbuffer)); - return(LGMNAL_STATUS_FAIL); - } - - rxd->next = nal_data->srxd; - nal_data->srxd = rxd; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Registered rxd [%p] with buffer [%p], size [%d]\n", rxd, rxd->buffer, rxd->size)); - } - - return(LGMNAL_STATUS_OK); -} - - - -/* Free the list of wired and gm_registered small rx buffers and the rx descriptors - * that go along with them. - * This function is only called when the API mutex is held (init or shutdown), - * so there is no need to hold the rxd spinlock. - */ -void -lgmnal_free_srxd(lgmnal_data_t *nal_data) -{ - lgmnal_srxd_t *rxd = nal_data->srxd, *_rxd = NULL; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_free_small rx\n")); - - while(rxd) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Freeing rxd [%p] with buffer [%p], size [%d]\n", rxd, rxd->buffer, rxd->size)); - _rxd = rxd; - rxd = rxd->next; - -#if 0 - LGMNAL_GM_LOCK(nal_data); - gm_deregister_memory(nal_data->gm_port, _rxd->buffer, _rxd->size); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(_rxd->buffer, LGMNAL_SMALL_RXBUFFER_SIZE); -#else - LGMNAL_GM_LOCK(nal_data); - gm_dma_free(nal_data->gm_port, _rxd->buffer); - LGMNAL_GM_UNLOCK(nal_data); -#endif - PORTAL_FREE(_rxd, sizeof(lgmnal_srxd_t)); - } - return; -} - - -/* - * Get a rxd from the free list - * This get us a wired and gm_registered small rx buffer. - * This implicitly gets us a receive token also. - */ -lgmnal_srxd_t * -lgmnal_get_srxd(lgmnal_data_t *nal_data, int block) -{ - - lgmnal_srxd_t *rxd = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_get_srxd nal_data [%p] block [%d]\n", nal_data, block)); - - if (block) { - LGMNAL_RXD_GETTOKEN(nal_data); - } else { - if (LGMNAL_RXD_TRYGETTOKEN(nal_data)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_get_srxd Can't get token\n")); - return(NULL); - } - } - LGMNAL_RXD_LOCK(nal_data); - rxd = nal_data->srxd; - if (rxd) - nal_data->srxd = rxd->next; - LGMNAL_RXD_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_get_srxd got [%p], head is [%p]\n", rxd, nal_data->srxd)); - return(rxd); -} - -/* - * Return an rxd to the list - */ -void -lgmnal_return_srxd(lgmnal_data_t *nal_data, lgmnal_srxd_t *rxd) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_return_srxd nal_data [%p], rxd[%p]\n", nal_data, rxd)); - - LGMNAL_RXD_LOCK(nal_data); - rxd->next = nal_data->srxd; - nal_data->srxd = rxd; - LGMNAL_RXD_UNLOCK(nal_data); - LGMNAL_RXD_RETURNTOKEN(nal_data); - return; -} - -/* - * Given a pointer to a srxd find - * the relevant descriptor for it - * This is done by searching a hash - * list that is created when the srxd's - * are created - */ -lgmnal_srxd_t * -lgmnal_rxbuffer_to_srxd(lgmnal_data_t *nal_data, void *rxbuffer) -{ - lgmnal_srxd_t *srxd = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_rxbuffer_to_srxd nal_data [%p], rxbuffer [%p]\n", nal_data, rxbuffer)); -#ifdef LGMNAL_USE_GM_HASH - srxd = gm_hash_find(nal_data->srxd_hash, rxbuffer); -#else - srxd = lgmnal_hash_find(nal_data->srxd_hash, rxbuffer); -#endif - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("srxd is [%p]\n", srxd)); - return(srxd); -} - - -void -lgmnal_stop_rxthread(lgmnal_data_t *nal_data) -{ - int delay = 15; - - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Attempting to stop rxthread nal_data [%p]\n", nal_data)); - - if (nal_data->rxthread_flag != LGMNAL_THREAD_CONTINUE) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("thread flag not correctly set\n")); - } - - nal_data->rxthread_flag = LGMNAL_THREAD_STOP; - LGMNAL_GM_LOCK(nal_data); - gm_set_alarm(nal_data->gm_port, &nal_data->rxthread_alarm, 10, NULL, NULL); - LGMNAL_GM_UNLOCK(nal_data); - - while(nal_data->rxthread_flag == LGMNAL_THREAD_STOP && delay--) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_stop_rxthread sleeping\n")); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1024); - } - - if (nal_data->rxthread_flag == LGMNAL_THREAD_STOP) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("I DON'T KNOW HOW TO WAKE THE THREAD\n")); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RX THREAD SEEMS TO HAVE STOPPED\n")); - } - -} - - - -char * -lgmnal_gm_error(gm_status_t status) -{ - switch(status) { - case(GM_SUCCESS): - return("SUCCESS"); - case(GM_FAILURE): - return("FAILURE"); - case(GM_INPUT_BUFFER_TOO_SMALL): - return("INPUT_BUFFER_TOO_SMALL"); - case(GM_OUTPUT_BUFFER_TOO_SMALL): - return("OUTPUT_BUFFER_TOO_SMALL"); - case(GM_TRY_AGAIN ): - return("TRY_AGAIN"); - case(GM_BUSY): - return("BUSY"); - case(GM_MEMORY_FAULT): - return("MEMORY_FAULT"); - case(GM_INTERRUPTED): - return("INTERRUPTED"); - case(GM_INVALID_PARAMETER): - return("INVALID_PARAMETER"); - case(GM_OUT_OF_MEMORY): - return("OUT_OF_MEMORY"); - case(GM_INVALID_COMMAND): - return("INVALID_COMMAND"); - case(GM_PERMISSION_DENIED): - return("PERMISSION_DENIED"); - case(GM_INTERNAL_ERROR): - return("INTERNAL_ERROR"); - case(GM_UNATTACHED): - return("UNATTACHED"); - case(GM_UNSUPPORTED_DEVICE): - return("UNSUPPORTED_DEVICE"); - case(GM_SEND_TIMED_OUT): - return("GM_SEND_TIMEDOUT"); - case(GM_SEND_REJECTED): - return("GM_SEND_REJECTED"); - case(GM_SEND_TARGET_PORT_CLOSED): - return("GM_SEND_TARGET_PORT_CLOSED"); - case(GM_SEND_TARGET_NODE_UNREACHABLE): - return("GM_SEND_TARGET_NODE_UNREACHABLE"); - case(GM_SEND_DROPPED): - return("GM_SEND_DROPPED"); - case(GM_SEND_PORT_CLOSED): - return("GM_SEND_PORT_CLOSED"); - case(GM_NODE_ID_NOT_YET_SET): - return("GM_NODE_ID_NOT_YET_SET"); - case(GM_STILL_SHUTTING_DOWN): - return("GM_STILL_SHUTTING_DOWN"); - case(GM_CLONE_BUSY): - return("GM_CLONE_BUSY"); - case(GM_NO_SUCH_DEVICE): - return("GM_NO_SUCH_DEVICE"); - case(GM_ABORTED): - return("GM_ABORTED"); - case(GM_INCOMPATIBLE_LIB_AND_DRIVER): - return("GM_INCOMPATIBLE_LIB_AND_DRIVER"); - case(GM_UNTRANSLATED_SYSTEM_ERROR): - return("GM_UNTRANSLATED_SYSTEM_ERROR"); - case(GM_ACCESS_DENIED): - return("GM_ACCESS_DENIED"); - - -/* - * These ones are in the docs but aren't in the header file - case(GM_DEV_NOT_FOUND): - return("GM_DEV_NOT_FOUND"); - case(GM_INVALID_PORT_NUMBER): - return("GM_INVALID_PORT_NUMBER"); - case(GM_UC_ERROR): - return("GM_US_ERROR"); - case(GM_PAGE_TABLE_FULL): - return("GM_PAGE_TABLE_FULL"); - case(GM_MINOR_OVERFLOW): - return("GM_MINOR_OVERFLOW"); - case(GM_SEND_ORPHANED): - return("GM_SEND_ORPHANED"); - case(GM_HARDWARE_FAULT): - return("GM_HARDWARE_FAULT"); - case(GM_DATA_CORRUPTED): - return("GM_DATA_CORRUPTED"); - case(GM_TIMED_OUT): - return("GM_TIMED_OUT"); - case(GM_USER_ERROR): - return("GM_USER_ERROR"); - case(GM_NO_MATCH): - return("GM_NOMATCH"); - case(GM_NOT_SUPPORTED_IN_KERNEL): - return("GM_NOT_SUPPORTED_IN_KERNEL"); - case(GM_NOT_SUPPORTED_ON_ARCH): - return("GM_NOT_SUPPORTED_ON_ARCH"); - case(GM_PTE_REF_CNT_OVERFLOW): - return("GM_PTR_REF_CNT_OVERFLOW"); - case(GM_NO_DRIVER_SUPPORT): - return("GM_NO_DRIVER_SUPPORT"); - case(GM_FIRMWARE_NOT_RUNNING): - return("GM_FIRMWARE_NOT_RUNNING"); - - * These ones are in the docs but aren't in the header file - */ - default: - return("UNKNOWN GM ERROR CODE"); - } -} - - -char * -lgmnal_rxevent(gm_recv_event_t *ev) -{ - short event; - char msg[24]; - event = GM_RECV_EVENT_TYPE(ev); - switch(event) { - case(GM_NO_RECV_EVENT): - return("GM_NO_RECV_EVENT"); - case(GM_SENDS_FAILED_EVENT): - return("GM_SEND_FAILED_EVENT"); - case(GM_ALARM_EVENT): - return("GM_ALARM_EVENT"); - case(GM_SENT_EVENT): - return("GM_SENT_EVENT"); - case(_GM_SLEEP_EVENT): - return("_GM_SLEEP_EVENT"); - case(GM_RAW_RECV_EVENT): - return("GM_RAW_RECV_EVENT"); - case(GM_BAD_SEND_DETECTED_EVENT): - return("GM_BAD_SEND_DETECTED_EVENT"); - case(GM_SEND_TOKEN_VIOLATION_EVENT): - return("GM_SEND_TOKEN_VIOLATION_EVENT"); - case(GM_RECV_TOKEN_VIOLATION_EVENT): - return("GM_RECV_TOKEN_VIOLATION_EVENT"); - case(GM_BAD_RECV_TOKEN_EVENT): - return("GM_BAD_RECV_TOKEN_EVENT"); - case(GM_ALARM_VIOLATION_EVENT): - return("GM_ALARM_VIOLATION_EVENT"); - case(GM_RECV_EVENT): - return("GM_RECV_EVENT"); - case(GM_HIGH_RECV_EVENT): - return("GM_HIGH_RECV_EVENT"); - case(GM_PEER_RECV_EVENT): - return("GM_PEER_RECV_EVENT"); - case(GM_HIGH_PEER_RECV_EVENT): - return("GM_HIGH_PEER_RECV_EVENT"); - case(GM_FAST_RECV_EVENT): - return("GM_FAST_RECV_EVENT"); - case(GM_FAST_HIGH_RECV_EVENT): - return("GM_FAST_HIGH_RECV_EVENT"); - case(GM_FAST_PEER_RECV_EVENT): - return("GM_FAST_PEER_RECV_EVENT"); - case(GM_FAST_HIGH_PEER_RECV_EVENT): - return("GM_FAST_HIGH_PEER_RECV_EVENT"); - case(GM_REJECTED_SEND_EVENT): - return("GM_REJECTED_SEND_EVENT"); - case(GM_ORPHANED_SEND_EVENT): - return("GM_ORPHANED_SEND_EVENT"); - case(GM_BAD_RESEND_DETECTED_EVENT): - return("GM_BAD_RESEND_DETETED_EVENT"); - case(GM_DROPPED_SEND_EVENT): - return("GM_DROPPED_SEND_EVENT"); - case(GM_BAD_SEND_VMA_EVENT): - return("GM_BAD_SEND_VMA_EVENT"); - case(GM_BAD_RECV_VMA_EVENT): - return("GM_BAD_RECV_VMA_EVENT"); - case(_GM_FLUSHED_ALARM_EVENT): - return("GM_FLUSHED_ALARM_EVENT"); - case(GM_SENT_TOKENS_EVENT): - return("GM_SENT_TOKENS_EVENTS"); - case(GM_IGNORE_RECV_EVENT): - return("GM_IGNORE_RECV_EVENT"); - case(GM_ETHERNET_RECV_EVENT): - return("GM_ETHERNET_RECV_EVENT"); - case(GM_NEW_NO_RECV_EVENT): - return("GM_NEW_NO_RECV_EVENT"); - case(GM_NEW_SENDS_FAILED_EVENT): - return("GM_NEW_SENDS_FAILED_EVENT"); - case(GM_NEW_ALARM_EVENT): - return("GM_NEW_ALARM_EVENT"); - case(GM_NEW_SENT_EVENT): - return("GM_NEW_SENT_EVENT"); - case(_GM_NEW_SLEEP_EVENT): - return("GM_NEW_SLEEP_EVENT"); - case(GM_NEW_RAW_RECV_EVENT): - return("GM_NEW_RAW_RECV_EVENT"); - case(GM_NEW_BAD_SEND_DETECTED_EVENT): - return("GM_NEW_BAD_SEND_DETECTED_EVENT"); - case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT): - return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT"); - case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT): - return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT"); - case(GM_NEW_BAD_RECV_TOKEN_EVENT): - return("GM_NEW_BAD_RECV_TOKEN_EVENT"); - case(GM_NEW_ALARM_VIOLATION_EVENT): - return("GM_NEW_ALARM_VIOLATION_EVENT"); - case(GM_NEW_RECV_EVENT): - return("GM_NEW_RECV_EVENT"); - case(GM_NEW_HIGH_RECV_EVENT): - return("GM_NEW_HIGH_RECV_EVENT"); - case(GM_NEW_PEER_RECV_EVENT): - return("GM_NEW_PEER_RECV_EVENT"); - case(GM_NEW_HIGH_PEER_RECV_EVENT): - return("GM_NEW_HIGH_PEER_RECV_EVENT"); - case(GM_NEW_FAST_RECV_EVENT): - return("GM_NEW_FAST_RECV_EVENT"); - case(GM_NEW_FAST_HIGH_RECV_EVENT): - return("GM_NEW_FAST_HIGH_RECV_EVENT"); - case(GM_NEW_FAST_PEER_RECV_EVENT): - return("GM_NEW_FAST_PEER_RECV_EVENT"); - case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT): - return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT"); - case(GM_NEW_REJECTED_SEND_EVENT): - return("GM_NEW_REJECTED_SEND_EVENT"); - case(GM_NEW_ORPHANED_SEND_EVENT): - return("GM_NEW_ORPHANED_SEND_EVENT"); - case(_GM_NEW_PUT_NOTIFICATION_EVENT): - return("_GM_NEW_PUT_NOTIFICATION_EVENT"); - case(GM_NEW_FREE_SEND_TOKEN_EVENT): - return("GM_NEW_FREE_SEND_TOKEN_EVENT"); - case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT): - return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT"); - case(GM_NEW_BAD_RESEND_DETECTED_EVENT): - return("GM_NEW_BAD_RESEND_DETECTED_EVENT"); - case(GM_NEW_DROPPED_SEND_EVENT): - return("GM_NEW_DROPPED_SEND_EVENT"); - case(GM_NEW_BAD_SEND_VMA_EVENT): - return("GM_NEW_BAD_SEND_VMA_EVENT"); - case(GM_NEW_BAD_RECV_VMA_EVENT): - return("GM_NEW_BAD_RECV_VMA_EVENT"); - case(_GM_NEW_FLUSHED_ALARM_EVENT): - return("GM_NEW_FLUSHED_ALARM_EVENT"); - case(GM_NEW_SENT_TOKENS_EVENT): - return("GM_NEW_SENT_TOKENS_EVENT"); - case(GM_NEW_IGNORE_RECV_EVENT): - return("GM_NEW_IGNORE_RECV_EVENT"); - case(GM_NEW_ETHERNET_RECV_EVENT): - return("GM_NEW_ETHERNET_RECV_EVENT"); - default: - snprintf(msg, 24, "Unknown Recv event [%d]", event); - return(msg); -#if 0 - case(/* _GM_PUT_NOTIFICATION_EVENT */ - case(/* GM_FREE_SEND_TOKEN_EVENT */ - case(/* GM_FREE_HIGH_SEND_TOKEN_EVENT */ -#endif - } -} - - -void -lgmnal_yield(int delay) -{ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(delay); -} - -int -lgmnal_is_small_message(lgmnal_data_t *nal_data, int niov, struct iovec *iov, int len) -{ - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_is_small_message len is [%d]\n", len)); - if (len < LGMNAL_SMALL_MSG_SIZE(nal_data)) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Yep, small message]\n")); - return(1); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No, not small message]\n")); - return(0); - } -} - -void * -lgmnal_hash_find(lgmnal_hash_t *hash, void *key) -{ - void *data = NULL; - int count = 0; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_find hash [%p] key [%p]\n", hash, key)); - - while (hash) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find Stepping [%d]\n", count++)); - if (hash->key == key) { - data = hash->data; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find hash got data[%p]\n", data)); - return(data); - } else - hash = hash->next; - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find data not found\n")); - return(NULL); -} - -/* - * TO DO hash. figure out why getting bad stuff from gm_hash and thne use it. - */ - -int -lgmnal_hash_add(lgmnal_hash_t **hash, void *key, void *data) -{ - -#ifdef LGMNAL_USE_GM_HASH - return(gm_hash_insert(*hash, (void*)key, (void*)data); -#else - lgmnal_hash_t *new = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_add hash [%p]\n", *hash)); - PORTAL_ALLOC(new, sizeof(lgmnal_hash_t)); - memset(new, 0, sizeof(lgmnal_hash_t)); - if (!new) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_hash_add :: can't get memory\n")); - return(-1); - } - new->data = data; - new->key = key; - new->next = *hash; - *hash = new; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_add hash head [%p]\n", *hash)); - return(0); -#endif -} - -void -lgmnal_hash_free(lgmnal_hash_t **hash) -{ - - lgmnal_hash_t *_hash = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_free hash [p%]\n", *hash)); - - while (*hash) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_free freeing hash [p%]\n", _hash)); - _hash = *hash; - *hash = _hash->next; - PORTAL_FREE(_hash, sizeof(lgmnal_hash_t)); - } - return; -} - - -EXPORT_SYMBOL(lgmnal_yield); -EXPORT_SYMBOL(lgmnal_print); -EXPORT_SYMBOL(lgmnal_alloc_srxd); -EXPORT_SYMBOL(lgmnal_get_srxd); -EXPORT_SYMBOL(lgmnal_return_srxd); -EXPORT_SYMBOL(lgmnal_free_srxd); -EXPORT_SYMBOL(lgmnal_alloc_stxd); -EXPORT_SYMBOL(lgmnal_get_stxd); -EXPORT_SYMBOL(lgmnal_return_stxd); -EXPORT_SYMBOL(lgmnal_free_stxd); -EXPORT_SYMBOL(lgmnal_rxbuffer_to_srxd); -EXPORT_SYMBOL(lgmnal_rxevent); -EXPORT_SYMBOL(lgmnal_gm_error); -EXPORT_SYMBOL(lgmnal_stop_rxthread); diff --git a/lnet/tests/startclient.sh b/lnet/tests/startclient.sh index c9b7c16..de01bc7 100644 --- a/lnet/tests/startclient.sh +++ b/lnet/tests/startclient.sh @@ -29,9 +29,16 @@ case "$1" in /sbin/insmod ./$PING echo kqswnal > /tmp/nal ;; + + gm) + /sbin/insmod portals + /sbin/insmod kgmnal + /sbin/insmod ./$PING + echo kgmnal > /tmp/nal + ;; *) - echo "Usage : ${0} < tcp | toe | elan >" + echo "Usage : ${0} < tcp | toe | elan | gm>" exit 1; esac exit 0; diff --git a/lnet/tests/startserver.sh b/lnet/tests/startserver.sh index 942300e..4f66eeb 100644 --- a/lnet/tests/startserver.sh +++ b/lnet/tests/startserver.sh @@ -29,9 +29,16 @@ case "$1" in /sbin/insmod ./$PING nal=4 echo kqswnal > /tmp/nal ;; + + gm) + /sbin/insmod portals + /sbin/insmod kgmnal + /sbin/insmod ./$PING nal=3 + echo kgmnal > /tmp/nal + ;; *) - echo "Usage : ${0} < tcp | toe | elan >" + echo "Usage : ${0} < tcp | toe | elan | gm>" exit 1; esac ../utils/acceptor 9999& diff --git a/lnet/utils/.cvsignore b/lnet/utils/.cvsignore index 8e474ad..e2a0d44 100644 --- a/lnet/utils/.cvsignore +++ b/lnet/utils/.cvsignore @@ -6,4 +6,5 @@ ptlctl .deps routerstat wirecheck +gmnalnid .*.cmd diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am index d51e3b3..c79909c 100644 --- a/lnet/utils/Makefile.am +++ b/lnet/utils/Makefile.am @@ -7,7 +7,7 @@ COMPILE = $(CC) -Wall -g -I$(srcdir)/../include LINK = $(CC) -o $@ -sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck +sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid lib_LIBRARIES = libptlctl.a acceptor_SOURCES = acceptor.c # -lefence @@ -16,6 +16,8 @@ wirecheck_SOURCES = wirecheck.c libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h +gmnalnid_SOURCES = gmnalnid.c + ptlctl_SOURCES = ptlctl.c ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence ptlctl_DEPENDENCIES = libptlctl.a diff --git a/lnet/utils/gmlndnid.c b/lnet/utils/gmlndnid.c new file mode 100644 index 0000000..701a814 --- /dev/null +++ b/lnet/utils/gmlndnid.c @@ -0,0 +1,118 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * This file is free software; you can redistribute it and/or + * modify it under the terms of version 2.1 of the GNU Lesser General + * Public License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Portals; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#define GMNAL_IOC_GET_GNID 1 + +int +roundup(int len) +{ + return((len+7) & (~0x7)); +} + +int main(int argc, char **argv) +{ + int rc, pfd; + struct portal_ioctl_data data; + unsigned int nid = 0, len; + char *name = NULL; + int c; + + + + while ((c = getopt(argc, argv, "n:l")) != -1) { + switch(c) { + case('n'): + name = optarg; + break; + case('l'): + printf("Get local id not implemented yet!\n"); + exit(-1); + default: + printf("usage %s -n nodename [-p]\n", argv[0]); + } + } + + if (!name) { + printf("usage %s -n nodename [-p]\n", argv[0]); + exit(-1); + } + + + + PORTAL_IOC_INIT (data); + + /* + * set up the inputs + */ + len = strlen(name) + 1; + data.ioc_pbuf1 = malloc(len); + strcpy(data.ioc_pbuf1, name); + data.ioc_plen1 = len; + + /* + * set up the outputs + */ + data.ioc_pbuf2 = (void*)&nid; + data.ioc_plen2 = sizeof(unsigned int*); + + pfd = open("/dev/portals", O_RDWR); + if ( pfd < 0 ) { + perror("opening portals device"); + free(data.ioc_pbuf1); + exit(-1); + } + + data.ioc_nal = GMNAL; + data.ioc_nal_cmd = GMNAL_IOC_GET_GNID; +/* + data.ioc_len += data.ioc_inllen1; + data.ioc_len += data.ioc_plen1; +*/ + rc = ioctl (pfd, IOC_PORTAL_NAL_CMD, &data); + if (rc < 0) + { + perror ("Can't get my NID"); + } + + free(data.ioc_pbuf1); + close(pfd); + printf("%u\n", nid); + exit(nid); +} diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4 index 41349fd..7910823 100644 --- a/lustre/portals/archdep.m4 +++ b/lustre/portals/archdep.m4 @@ -286,7 +286,7 @@ if test "${with_gm+set}" = set; then if test "${with_gm}" = yes; then with_gm="-I/usr/local/gm/include" else - with_gm=-I"$with_gm/include" + with_gm="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm" fi GMNAL="gmnal" else diff --git a/lustre/portals/knals/gmnal/Makefile.am b/lustre/portals/knals/gmnal/Makefile.am index 1dc6f4e..bac4680 100644 --- a/lustre/portals/knals/gmnal/Makefile.am +++ b/lustre/portals/knals/gmnal/Makefile.am @@ -9,5 +9,5 @@ MODULE = kgmnal modulenet_DATA = kgmnal.o EXTRA_PROGRAMS = kgmnal -DEFS = -kgmnal_SOURCES = gmnal.c gmnal_cb.c gmnal.h +DEFS = -DGM_KERNEL +kgmnal_SOURCES = gmnal.h gmnal_api.c gmnal_cb.c gmnal_comm.c gmnal_utils.c gmnal_module.c diff --git a/lnet/klnds/lgmlnd/Makefile.mk b/lustre/portals/knals/gmnal/Makefile.mk similarity index 63% rename from lnet/klnds/lgmlnd/Makefile.mk rename to lustre/portals/knals/gmnal/Makefile.mk index c8ca67f..b799a47 100644 --- a/lnet/klnds/lgmlnd/Makefile.mk +++ b/lustre/portals/knals/gmnal/Makefile.mk @@ -5,6 +5,6 @@ include ../../Kernelenv -obj-y += lgmnal.o -lgmnal-objs := lgmnal_api.o lgmnal_cb.o lgmnal_utils.o lgmnal_comm.o lgmnal_module.o +obj-y += gmnal.o +gmnal-objs := gmnal_api.o gmnal_cb.o gmnal_utils.o gmnal_comm.o gmnal_module.o diff --git a/lustre/portals/knals/gmnal/gm-1.5.2.1-exports.patch b/lustre/portals/knals/gmnal/gm-1.5.2.1-exports.patch deleted file mode 100644 index 23c80d9..0000000 --- a/lustre/portals/knals/gmnal/gm-1.5.2.1-exports.patch +++ /dev/null @@ -1,43 +0,0 @@ -diff -ru gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c ---- gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c Mon Jul 1 10:35:09 2002 -+++ gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c Thu Sep 19 14:19:38 2002 -@@ -30,6 +30,8 @@ - * - ************************************************************************/ - -+#define EXPORT_SYMTAB -+ - #include - #include - -@@ -4075,6 +4077,28 @@ - return 0; - } - -+EXPORT_SYMBOL(gm_blocking_receive_no_spin); -+EXPORT_SYMBOL(gm_close); -+EXPORT_SYMBOL(gm_dma_free); -+EXPORT_SYMBOL(gm_dma_malloc); -+EXPORT_SYMBOL(gm_drop_sends); -+EXPORT_SYMBOL(gm_finalize); -+EXPORT_SYMBOL(gm_get_node_id); -+EXPORT_SYMBOL(gm_init); -+EXPORT_SYMBOL(gm_initialize_alarm); -+EXPORT_SYMBOL(gm_max_node_id_in_use); -+EXPORT_SYMBOL(gm_min_size_for_length); -+EXPORT_SYMBOL(gm_num_receive_tokens); -+EXPORT_SYMBOL(gm_num_send_tokens); -+EXPORT_SYMBOL(gm_open); -+EXPORT_SYMBOL(gm_provide_receive_buffer); -+EXPORT_SYMBOL(gm_resume_sending); -+EXPORT_SYMBOL(gm_send_with_callback); -+EXPORT_SYMBOL(gm_set_acceptable_sizes); -+EXPORT_SYMBOL(gm_set_alarm); -+EXPORT_SYMBOL(gm_unknown); -+ -+ - /* - This file uses GM standard indentation. - -Only in gm-1.5.2.1_Linux-cfs/drivers/linux/gm: gm_arch.c~ -Only in gm-1.5.2.1_Linux-cfs/: trace diff --git a/lustre/portals/knals/gmnal/gmnal.c b/lustre/portals/knals/gmnal/gmnal.c deleted file mode 100644 index 24708f7..0000000 --- a/lustre/portals/knals/gmnal/gmnal.c +++ /dev/null @@ -1,284 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Based on ksocknal and qswnal - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Robert Read - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "gmnal.h" - -ptl_handle_ni_t kgmnal_ni; -nal_t kgmnal_api; - -kgmnal_data_t kgmnal_data; -int gmnal_debug = 0; - -kpr_nal_interface_t kqswnal_router_interface = { - kprni_nalid: GMNAL, - kprni_arg: NULL, - kprni_fwd: kgmnal_fwd_packet, -}; - -static int kgmnal_forward(nal_t *nal, - int id, - void *args, size_t args_len, - void *ret, size_t ret_len) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */ - return PTL_OK; -} - -static void kgmnal_lock(nal_t *nal, unsigned long *flags) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - nal_cb->cb_cli(nal_cb,flags); -} - -static void kgmnal_unlock(nal_t *nal, unsigned long *flags) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - nal_cb->cb_sti(nal_cb,flags); -} - -static int kgmnal_shutdown(nal_t *nal, int ni) -{ - LASSERT (nal == &kgmnal_api); - return 0; -} - -static void kgmnal_yield( nal_t *nal ) -{ - LASSERT (nal == &kgmnal_api); - - if (current->need_resched) - schedule(); - return; -} - -kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *data,int ndx) -{ - kgmnal_rx_t *conn; - - PORTAL_ALLOC(conn, sizeof(kgmnal_rx_t)); - /* Check for out of mem here */ - if (conn==NULL) { - printk("LustreError: kgm_add_recv: memory alloc failed\n"); - return NULL; - } - - list_add(&conn->krx_item,(struct list_head *)&data->kgm_list); - // conn->ndx=ndx; - // conn->len=conn->ptlhdr_copied=0; - // conn->loopback=0; - return conn; -} - -static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - unsigned int nnids; - - gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids); - - CDEBUG(D_NET, "calling lib_init with nid 0x%Lx of %d\n", - kgmnal_data.kgm_nid, nnids); - lib_init(&kgmnal_lib, kgmnal_data.kgm_nid, 0, nnids,ptl_size, ac_size); - return &kgmnal_api; -} - -static void /*__exit*/ -kgmnal_finalize(void) -{ - struct list_head *tmp; - - PORTAL_SYMBOL_UNREGISTER (kgmnal_ni); - PtlNIFini(kgmnal_ni); - lib_fini(&kgmnal_api); - - if (kgmnal_data.kgm_port) { - gm_close(kgmnal_data.kgm_port); - } - - /* FIXME: free dma buffers */ - /* FIXME: kill receiver thread */ - - PORTAL_FREE (kgmnal_data.kgm_trans, bsizeof(kgmnal_tx_t)*TXMSGS); - - list_for_each(tmp, &kgmnal_data.kgm_list) { - kgmnal_rx_t *conn; - conn = list_entry(tmp, kgmnal_rx_t, krx_item); - CDEBUG(D_IOCTL, "freeing conn %p\n",conn); - tmp = tmp->next; - list_del(&conn->krx_item); - PORTAL_FREE(conn, sizeof(*conn)); - } - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); - - return; -} - -static int __init -kgmnal_initialize(void) -{ - int rc; - int ntok; - unsigned long sizemask; - unsigned int nid; - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); - - kgmnal_api.forward = kgmnal_forward; - kgmnal_api.shutdown = kgmnal_shutdown; - kgmnal_api.yield = kgmnal_yield; - kgmnal_api.validate = NULL; /* our api validate is a NOOP */ - kgmnal_api.lock= kgmnal_lock; - kgmnal_api.unlock= kgmnal_unlock; - kgmnal_api.nal_data = &kgmnal_data; - - kgmnal_lib.nal_data = &kgmnal_data; - - memset(&kgmnal_data, 0, sizeof(kgmnal_data)); - - INIT_LIST_HEAD(&kgmnal_data.kgm_list); - kgmnal_data.kgm_cb = &kgmnal_lib; - - /* Allocate transmit descriptors */ - PORTAL_ALLOC (kgmnal_data.kgm_trans, sizeof(kgmnal_tx_t)*TXMSGS); - if (kgmnal_data.kgm_trans==NULL) { - printk("LustreError: kgmnal: init: failed to allocate transmit " - "descriptors\n"); - return -1; - } - memset(kgmnal_data.kgm_trans,-1,sizeof(kgmnal_tx_t)*(TXMSGS)); - - spin_lock_init(&kgmnal_data.kgm_dispatch_lock); - spin_lock_init(&kgmnal_data.kgm_update_lock); - spin_lock_init(&kgmnal_data.kgm_send_lock); - - /* Do the receiver and xmtr allocation */ - - rc = gm_init(); - if (rc != GM_SUCCESS) { - CERROR("gm_init failed: %d\n", rc); - return -1; - } - - rc = gm_open(&kgmnal_data.kgm_port, 0 , KGM_PORT_NUM, KGM_HOSTNAME, - GM_API_VERSION_1_1); - if (rc != GM_SUCCESS) { - gm_finalize(); - kgmnal_data.kgm_port = NULL; - CERROR("gm_open failed: %d\n", rc); - return -1; - } - gm_get_node_id(kgmnal_data.kgm_port, &nid); - kgmnal_data.kgm_nid = nid; - /* Allocate 2 different sizes of buffers. For new, use half - the tokens for each. */ - ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2; - CDEBUG(D_NET, "gmnal_init: creating %d large %d byte recv buffers\n", - ntok, MSG_LEN_LARGE); - while (ntok-- > 0) { - void * buffer = gm_dma_malloc(kgmnal_data.kgm_port, - MSG_LEN_LARGE); - if (buffer == NULL) { - CERROR("gm_init failed: %d\n", rc); - return (-ENOMEM); - } - CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d " - "pri %d\n ", kgmnal_data.kgm_port, buffer, - MSG_LEN_LARGE, MSG_SIZE_LARGE, GM_LOW_PRIORITY); - - gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer, - MSG_SIZE_LARGE, GM_LOW_PRIORITY); - } - - ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2; - CDEBUG(D_NET, "gmnal_init: creating %d small %d byte recv buffers\n", - ntok, MSG_LEN_SMALL); - while (ntok-- > 0) { - void * buffer = gm_dma_malloc(kgmnal_data.kgm_port, - MSG_LEN_SMALL); - if (buffer == NULL) { - CERROR("gm_init failed: %d\n", rc); - return (-ENOMEM); - } - CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d " - "pri %d\n ", kgmnal_data.kgm_port, buffer, - MSG_LEN_SMALL, MSG_SIZE_SMALL, GM_LOW_PRIORITY); - - gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer, - MSG_SIZE_SMALL, GM_LOW_PRIORITY); - - } - sizemask = (1 << MSG_SIZE_LARGE) | (1 << MSG_SIZE_SMALL); - CDEBUG(D_NET, "gm_set_acceptable_sizes port %p pri %d mask 0x%x\n", - kgmnal_data.kgm_port, GM_LOW_PRIORITY, sizemask); - gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_LOW_PRIORITY, - sizemask); - gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_HIGH_PRIORITY, 0); - - /* Initialize Network Interface */ - rc = PtlNIInit(kgmnal_init, 32, 4, 0, &kgmnal_ni); - if (rc) { - CERROR("PtlNIInit failed %d\n", rc); - return (-ENOMEM); - } - - /* Start receiver thread */ - kernel_thread(kgmnal_recv_thread, &kgmnal_data, 0); - - PORTAL_SYMBOL_REGISTER(kgmnal_ni); - - kgmnal_data.kgm_init = 1; - - return 0; -} - -MODULE_AUTHOR("Robert Read "); -MODULE_DESCRIPTION("Kernel Myrinet GM NAL v0.1"); -MODULE_LICENSE("GPL"); - -module_init (kgmnal_initialize); -module_exit (kgmnal_finalize); - -EXPORT_SYMBOL (kgmnal_ni); diff --git a/lustre/portals/knals/gmnal/gmnal.h b/lustre/portals/knals/gmnal/gmnal.h index 47e8c3c..fdde839 100644 --- a/lustre/portals/knals/gmnal/gmnal.h +++ b/lustre/portals/knals/gmnal/gmnal.h @@ -1,101 +1,455 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#ifndef _GMNAL_H -#define _GMNAL_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include + + +/* + * Portals GM kernel NAL header file + * This file makes all declaration and prototypes + * for the API side and CB side of the NAL + */ +#ifndef __INCLUDE_GMNAL_H__ +#define __INCLUDE_GMNAL_H__ + +#include "linux/config.h" +#include "linux/module.h" +#include "linux/tty.h" +#include "linux/kernel.h" +#include "linux/mm.h" +#include "linux/string.h" +#include "linux/stat.h" +#include "linux/errno.h" +#include "linux/locks.h" +#include "linux/unistd.h" +#include "linux/init.h" +#include "linux/sem.h" +#include "linux/vmalloc.h" +#ifdef MODVERSIONS +#include +#endif #define DEBUG_SUBSYSTEM S_GMNAL -#include -#include -#include +#include "portals/nal.h" +#include "portals/api.h" +#include "portals/errno.h" +#include "linux/kp30.h" +#include "portals/p30.h" + +#include "portals/lib-nal.h" +#include "portals/lib-p30.h" + +#define GM_STRONG_TYPES 1 +#include "gm.h" +#include "gm_internal.h" + + +/* + * Defines for the API NAL + */ + +/* + * Small message size is configurable + * insmod can set small_msg_size + * which is used to populate nal_data.small_msg_size + */ +#define GMNAL_SMALL_MESSAGE 1078 +#define GMNAL_LARGE_MESSAGE_INIT 1079 +#define GMNAL_LARGE_MESSAGE_ACK 1080 +#define GMNAL_LARGE_MESSAGE_FINI 1081 + +extern int gmnal_small_msg_size; +extern int num_rx_threads; +extern int num_stxds; +#define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size +#define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c) +#define GMNAL_MAGIC 0x1234abcd + + +/* + * Small Transmit Descriptor + * A structre to keep track of a small transmit operation + * This structure has a one-to-one relationship with a small + * transmit buffer (both create by gmnal_stxd_alloc). + * There are two free list of stxd. One for use by clients of the NAL + * and the other by the NAL rxthreads when doing sends. + * This helps prevent deadlock caused by stxd starvation. + */ +typedef struct _gmnal_stxd_t { + void *buffer; + int buffer_size; + gm_size_t gm_size; + int msg_size; + int gm_target_node; + int gm_priority; + int type; + struct _gmnal_data_t *nal_data; + lib_msg_t *cookie; + int niov; + struct iovec iov[PTL_MD_MAX_IOV]; + struct _gmnal_srxd_t *srxd; + struct _gmnal_stxd_t *next; + int rxt; + int kniov; + struct iovec *iovec_dup; +} gmnal_stxd_t; + +/* + * as for gmnal_stxd_t + * a hash table in nal_data find srxds from + * the rx buffer address. hash table populated at init time + */ +typedef struct _gmnal_srxd_t { + void *buffer; + int size; + gm_size_t gmsize; + unsigned int gm_source_node; + gmnal_stxd_t *source_stxd; + int type; + int nsiov; + int nriov; + struct iovec *riov; + int ncallbacks; + spinlock_t callback_lock; + int callback_status; + lib_msg_t *cookie; + struct _gmnal_srxd_t *next; + struct _gmnal_data_t *nal_data; +} gmnal_srxd_t; + +/* + * Header which lmgnal puts at the start of each message + */ +typedef struct _gmnal_msghdr { + int magic; + int type; + unsigned int sender_node_id; + gmnal_stxd_t *stxd; + int niov; + } gmnal_msghdr_t; +#define GMNAL_MSGHDR_SIZE sizeof(gmnal_msghdr_t) + +/* + * the caretaker thread (ct_thread) gets receive events + * (and other events) from the myrinet device via the GM2 API. + * caretaker thread populates one work entry for each receive event, + * puts it on a Q in nal_data and wakes a receive thread to + * process the receive. + * Processing a portals receive can involve a transmit operation. + * Because of this the caretaker thread cannot process receives + * as it may get deadlocked when supply of transmit descriptors + * is exhausted (as caretaker thread is responsible for replacing + * transmit descriptors on the free list) + */ +typedef struct _gmnal_rxtwe { + gm_recv_event_t *rx; + struct _gmnal_rxtwe *next; +} gmnal_rxtwe_t; + +/* + * 1 receive thread started on each CPU + */ +#define NRXTHREADS 10 /* max number of receiver threads */ + +typedef struct _gmnal_data_t { + int refcnt; + spinlock_t cb_lock; + spinlock_t stxd_lock; + struct semaphore stxd_token; + gmnal_stxd_t *stxd; + spinlock_t rxt_stxd_lock; + struct semaphore rxt_stxd_token; + gmnal_stxd_t *rxt_stxd; + spinlock_t srxd_lock; + struct semaphore srxd_token; + gmnal_srxd_t *srxd; + struct gm_hash *srxd_hash; + nal_t *nal; + nal_cb_t *nal_cb; + struct gm_port *gm_port; + unsigned int gm_local_nid; + unsigned int gm_global_nid; + spinlock_t gm_lock; + long rxthread_pid[NRXTHREADS]; + int rxthread_stop_flag; + spinlock_t rxthread_flag_lock; + long rxthread_flag; + long ctthread_pid; + int ctthread_flag; + gm_alarm_t ctthread_alarm; + int small_msg_size; + int small_msg_gmsize; + gmnal_rxtwe_t *rxtwe_head; + gmnal_rxtwe_t *rxtwe_tail; + spinlock_t rxtwe_lock; + struct semaphore rxtwe_wait; +} gmnal_data_t; + +/* + * Flags to start/stop and check status of threads + * each rxthread sets 1 bit (any bit) of the flag on startup + * and clears 1 bit when exiting + */ +#define GMNAL_THREAD_RESET 0 +#define GMNAL_THREAD_STOP 666 +#define GMNAL_CTTHREAD_STARTED 333 +#define GMNAL_RXTHREADS_STARTED ( (1< +/* + * FUNCTION PROTOTYPES + */ + +/* + * Locking macros + */ /* - * Myrinet GM NAL + * For the Small tx and rx descriptor lists */ -#define NPAGES_LARGE 16 -#define NPAGES_SMALL 1 -#define MSG_LEN_LARGE NPAGES_LARGE*PAGE_SIZE -#define MSG_LEN_SMALL NPAGES_SMALL*PAGE_SIZE -#define MSG_SIZE_LARGE (gm_min_size_for_length(MSG_LEN_LARGE)) -#define MSG_SIZE_SMALL (gm_min_size_for_length(MSG_LEN_SMALL)) +#define GMNAL_TXD_LOCK_INIT(a) spin_lock_init(&a->stxd_lock); +#define GMNAL_TXD_LOCK(a) spin_lock(&a->stxd_lock); +#define GMNAL_TXD_UNLOCK(a) spin_unlock(&a->stxd_lock); +#define GMNAL_TXD_TOKEN_INIT(a, n) sema_init(&a->stxd_token, n); +#define GMNAL_TXD_GETTOKEN(a) down(&a->stxd_token); +#define GMNAL_TXD_TRYGETTOKEN(a) down_trylock(&a->stxd_token) +#define GMNAL_TXD_RETURNTOKEN(a) up(&a->stxd_token); -#define TXMSGS 64 /* Number of Transmit Messages */ -#define ENVELOPES 8 /* Number of outstanding receive msgs */ +#define GMNAL_RXT_TXD_LOCK_INIT(a) spin_lock_init(&a->rxt_stxd_lock); +#define GMNAL_RXT_TXD_LOCK(a) spin_lock(&a->rxt_stxd_lock); +#define GMNAL_RXT_TXD_UNLOCK(a) spin_unlock(&a->rxt_stxd_lock); +#define GMNAL_RXT_TXD_TOKEN_INIT(a, n) sema_init(&a->rxt_stxd_token, n); +#define GMNAL_RXT_TXD_GETTOKEN(a) down(&a->rxt_stxd_token); +#define GMNAL_RXT_TXD_TRYGETTOKEN(a) down_trylock(&a->rxt_stxd_token) +#define GMNAL_RXT_TXD_RETURNTOKEN(a) up(&a->rxt_stxd_token); -#define KGM_PORT_NUM 3 -#define KGM_HOSTNAME "kgmnal" +#define GMNAL_RXD_LOCK_INIT(a) spin_lock_init(&a->srxd_lock); +#define GMNAL_RXD_LOCK(a) spin_lock(&a->srxd_lock); +#define GMNAL_RXD_UNLOCK(a) spin_unlock(&a->srxd_lock); +#define GMNAL_RXD_TOKEN_INIT(a, n) sema_init(&a->srxd_token, n); +#define GMNAL_RXD_GETTOKEN(a) down(&a->srxd_token); +#define GMNAL_RXD_TRYGETTOKEN(a) down_trylock(&a->srxd_token) +#define GMNAL_RXD_RETURNTOKEN(a) up(&a->srxd_token); +#define GMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock); +#define GMNAL_GM_LOCK(a) spin_lock(&a->gm_lock); +#define GMNAL_GM_UNLOCK(a) spin_unlock(&a->gm_lock); +#define GMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock); -typedef struct { - char *krx_buffer; - unsigned long krx_len; - unsigned int krx_size; - unsigned int krx_priority; - struct list_head krx_item; -} kgmnal_rx_t; +/* + * Memory Allocator + */ + +/* + * API NAL + */ +int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); + +int gmnal_api_shutdown(nal_t *, int); + +int gmnal_api_validate(nal_t *, void *, size_t); + +void gmnal_api_yield(nal_t *); + +void gmnal_api_lock(nal_t *, unsigned long *); + +void gmnal_api_unlock(nal_t *, unsigned long *); + + +#define GMNAL_INIT_NAL(a) do { \ + a->forward = gmnal_api_forward; \ + a->shutdown = gmnal_api_shutdown; \ + a->validate = NULL; \ + a->yield = gmnal_api_yield; \ + a->lock = gmnal_api_lock; \ + a->unlock = gmnal_api_unlock; \ + a->timeout = NULL; \ + a->refct = 1; \ + a->nal_data = NULL; \ + } while (0) + + +/* + * CB NAL + */ + +int gmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t); + +int gmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t); + +int gmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *, + unsigned int, struct iovec *, size_t, size_t); + +int gmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *, + unsigned int, ptl_kiov_t *, size_t, size_t); + +int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t); + +int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t); + +int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *); + +void *gmnal_cb_malloc(nal_cb_t *, size_t); + +void gmnal_cb_free(nal_cb_t *, void *, size_t); + +void gmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **); + +int gmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **); + +void gmnal_cb_printf(nal_cb_t *, const char *fmt, ...); + +void gmnal_cb_cli(nal_cb_t *, unsigned long *); + +void gmnal_cb_sti(nal_cb_t *, unsigned long *); + +int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); + +nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); + +void gmnal_fini(void); + + + +#define GMNAL_INIT_NAL_CB(a) do { \ + a->cb_send = gmnal_cb_send; \ + a->cb_send_pages = gmnal_cb_send_pages; \ + a->cb_recv = gmnal_cb_recv; \ + a->cb_recv_pages = gmnal_cb_recv_pages; \ + a->cb_read = gmnal_cb_read; \ + a->cb_write = gmnal_cb_write; \ + a->cb_callback = gmnal_cb_callback; \ + a->cb_malloc = gmnal_cb_malloc; \ + a->cb_free = gmnal_cb_free; \ + a->cb_map = NULL; \ + a->cb_unmap = NULL; \ + a->cb_printf = gmnal_cb_printf; \ + a->cb_cli = gmnal_cb_cli; \ + a->cb_sti = gmnal_cb_sti; \ + a->cb_dist = gmnal_cb_dist; \ + a->nal_data = NULL; \ + } while (0) + + +/* + * Small Transmit and Receive Descriptor Functions + */ +int gmnal_alloc_stxd(gmnal_data_t *); +void gmnal_free_stxd(gmnal_data_t *); +gmnal_stxd_t* gmnal_get_stxd(gmnal_data_t *, int); +void gmnal_return_stxd(gmnal_data_t *, gmnal_stxd_t *); + +int gmnal_alloc_srxd(gmnal_data_t *); +void gmnal_free_srxd(gmnal_data_t *); +gmnal_srxd_t* gmnal_get_srxd(gmnal_data_t *, int); +void gmnal_return_srxd(gmnal_data_t *, gmnal_srxd_t *); + +/* + * general utility functions + */ +gmnal_srxd_t *gmnal_rxbuffer_to_srxd(gmnal_data_t *, void*); +void gmnal_stop_rxthread(gmnal_data_t *); +void gmnal_stop_ctthread(gmnal_data_t *); +void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); +void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t); +char *gmnal_gm_error(gm_status_t); +char *gmnal_rxevent(gm_recv_event_t*); +int gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int); +void gmnal_yield(int); +int gmnal_start_kernel_threads(gmnal_data_t *); + + +/* + * Communication functions + */ + +/* + * Receive threads + */ +int gmnal_ct_thread(void *); /* caretaker thread */ +int gmnal_rx_thread(void *); /* receive thread */ +int gmnal_pre_receive(gmnal_data_t*, gm_recv_t*, int); +int gmnal_rx_bad(gmnal_data_t *, gm_recv_t *, gmnal_srxd_t *); +int gmnal_rx_requeue_buffer(gmnal_data_t *, gmnal_srxd_t *); +int gmnal_add_rxtwe(gmnal_data_t *, gm_recv_event_t *); +gmnal_rxtwe_t * gmnal_get_rxtwe(gmnal_data_t *); +void gmnal_remove_rxtwe(gmnal_data_t *); + + +/* + * Small messages + */ +int gmnal_small_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, + struct iovec *, size_t, size_t); +int gmnal_small_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, + unsigned int, struct iovec*, int); +void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t); + + + +/* + * Large messages + */ +int gmnal_large_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int, + struct iovec *, size_t, size_t); -typedef struct { - nal_cb_t *ktx_nal; - void *ktx_private; - lib_msg_t *ktx_cookie; - char *ktx_buffer; - size_t ktx_len; - unsigned long ktx_size; - int ktx_ndx; - unsigned int ktx_priority; - unsigned int ktx_tgt_node; - unsigned int ktx_tgt_port_id; -} kgmnal_tx_t; +int gmnal_large_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, + struct iovec*, int); +void gmnal_large_tx_callback(gm_port_t *, void *, gm_status_t); -typedef struct { - char kgm_init; - char kgm_shuttingdown; - struct gm_port *kgm_port; - struct list_head kgm_list; - ptl_nid_t kgm_nid; - nal_cb_t *kgm_cb; - struct kgm_trans *kgm_trans; - struct tq_struct kgm_ready_tq; - spinlock_t kgm_dispatch_lock; - spinlock_t kgm_update_lock; - spinlock_t kgm_send_lock; -} kgmnal_data_t; +int gmnal_remote_get(gmnal_srxd_t *, int, struct iovec*, int, + struct iovec*); -int kgm_init(kgmnal_data_t *kgm_data); -int kgmnal_recv_thread(void *); -int gm_return_mynid(void); -void kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); +void gmnal_remote_get_callback(gm_port_t *, void *, gm_status_t); -extern kgmnal_data_t kgmnal_data; -extern nal_t kgmnal_api; -extern nal_cb_t kgmnal_lib; +int gmnal_copyiov(int, gmnal_srxd_t *, int, struct iovec*, int, + struct iovec*); -#endif /* _GMNAL_H */ +void gmnal_large_tx_ack(gmnal_data_t *, gmnal_srxd_t *); +void gmnal_large_tx_ack_callback(gm_port_t *, void *, gm_status_t); +void gmnal_large_tx_ack_received(gmnal_data_t *, gmnal_srxd_t *); +#endif /*__INCLUDE_GMNAL_H__*/ diff --git a/lustre/portals/knals/gmnal/gmnal_api.c b/lustre/portals/knals/gmnal/gmnal_api.c new file mode 100644 index 0000000..40d23db --- /dev/null +++ b/lustre/portals/knals/gmnal/gmnal_api.c @@ -0,0 +1,474 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * Implements the API NAL functions + */ + +#include "gmnal.h" + +gmnal_data_t *global_nal_data = NULL; +/* + * gmnal_api_forward + * This function takes a pack block of arguments from the NAL API + * module and passes them to the NAL CB module. The CB module unpacks + * the args and calls the appropriate function indicated by index. + * Typically this function is used to pass args between kernel and use + * space. + * As lgmanl exists entirely in kernel, just pass the arg block directly + * to the NAL CB, buy passing the args to lib_dispatch + * Arguments are + * nal_t nal Our nal + * int index the api function that initiated this call + * void *args packed block of function args + * size_t arg_len length of args block + * void *ret A return value for the API NAL + * size_t ret_len Size of the return value + * + */ + +int +gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, + void *ret, size_t ret_len) +{ + + nal_cb_t *nal_cb = NULL; + gmnal_data_t *nal_data = NULL; + + + + + + if (!nal || !args || (index < 0) || (arg_len < 0)) { + CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n"); + return (PTL_FAIL); + } + + if (ret && (ret_len <= 0)) { + CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n"); + return (PTL_FAIL); + } + + + if (!nal->nal_data) { + CDEBUG(D_ERROR, "bad nal, no nal data\n"); + return (PTL_FAIL); + } + + nal_data = nal->nal_data; + CDEBUG(D_INFO, "nal_data is [%p]\n", nal_data); + + if (!nal_data->nal_cb) { + CDEBUG(D_ERROR, "bad nal_data, no nal_cb\n"); + return (PTL_FAIL); + } + + nal_cb = nal_data->nal_cb; + CDEBUG(D_INFO, "nal_cb is [%p]\n", nal_cb); + + CDEBUG(D_PORTALS, "gmnal_api_forward calling lib_dispatch\n"); + lib_dispatch(nal_cb, NULL, index, args, ret); + CDEBUG(D_PORTALS, "gmnal_api_forward returns from lib_dispatch\n"); + + return(PTL_OK); +} + + +/* + * gmnal_api_shutdown + * Close down this interface and free any resources associated with it + * nal_t nal our nal to shutdown + */ +int +gmnal_api_shutdown(nal_t *nal, int interface) +{ + + gmnal_data_t *nal_data = nal->nal_data; + + CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data); + + return(PTL_OK); +} + + +/* + * gmnal_api_validate + * validate a user address for use in communications + * There's nothing to be done here + */ +int +gmnal_api_validate(nal_t *nal, void *base, size_t extent) +{ + + return(PTL_OK); +} + + + +/* + * gmnal_api_yield + * Give up the processor + */ +void +gmnal_api_yield(nal_t *nal) +{ + CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal); + + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + + return; +} + + + +/* + * gmnal_api_lock + * Take a threadsafe lock + */ +void +gmnal_api_lock(nal_t *nal, unsigned long *flags) +{ + + gmnal_data_t *nal_data; + nal_cb_t *nal_cb; + + nal_data = nal->nal_data; + nal_cb = nal_data->nal_cb; + + nal_cb->cb_cli(nal_cb, flags); + + return; +} + +/* + * gmnal_api_unlock + * Release a threadsafe lock + */ +void +gmnal_api_unlock(nal_t *nal, unsigned long *flags) +{ + gmnal_data_t *nal_data; + nal_cb_t *nal_cb; + + nal_data = nal->nal_data; + nal_cb = nal_data->nal_cb; + + nal_cb->cb_sti(nal_cb, flags); + + return; +} + + +nal_t * +gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, + ptl_pid_t rpid) +{ + + nal_t *nal = NULL; + nal_cb_t *nal_cb = NULL; + gmnal_data_t *nal_data = NULL; + gmnal_srxd_t *srxd = NULL; + gm_status_t gm_status; + unsigned int local_nid = 0, global_nid = 0; + ptl_nid_t portals_nid; + ptl_pid_t portals_pid = 0; + + + CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], + ac_size[%d]\n", interface, ptl_size, ac_size); + + + PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t)); + if (!nal_data) { + CDEBUG(D_ERROR, "can't get memory\n"); + return(NULL); + } + memset(nal_data, 0, sizeof(gmnal_data_t)); + /* + * set the small message buffer size + */ + nal_data->refcnt = 1; + + CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data); + CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size); + + PORTAL_ALLOC(nal, sizeof(nal_t)); + if (!nal) { + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + return(NULL); + } + memset(nal, 0, sizeof(nal_t)); + CDEBUG(D_INFO, "Allocd and reset nal[%p]\n", nal); + + PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); + if (!nal_cb) { + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + return(NULL); + } + memset(nal_cb, 0, sizeof(nal_cb_t)); + CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb); + + GMNAL_INIT_NAL(nal); + GMNAL_INIT_NAL_CB(nal_cb); + /* + * String them all together + */ + nal->nal_data = (void*)nal_data; + nal_cb->nal_data = (void*)nal_data; + nal_data->nal = nal; + nal_data->nal_cb = nal_cb; + + GMNAL_CB_LOCK_INIT(nal_data); + GMNAL_GM_LOCK_INIT(nal_data); + + + /* + * initialise the interface, + */ + CDEBUG(D_INFO, "Calling gm_init\n"); + if (gm_init() != GM_SUCCESS) { + CDEBUG(D_ERROR, "call to gm_init failed\n"); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], + name [%s], version [%d]\n", interface, GMNAL_GM_PORT, + "gmnal", GM_API_VERSION); + + GMNAL_GM_LOCK(nal_data); + gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT, "gmnal", + GM_API_VERSION); + GMNAL_GM_UNLOCK(nal_data); + + CDEBUG(D_INFO, "gm_open returned [%d]\n", gm_status); + if (gm_status == GM_SUCCESS) { + CDEBUG(D_INFO, "gm_open succeeded port[%p]\n", + nal_data->gm_port); + } else { + switch(gm_status) { + case(GM_INVALID_PARAMETER): + CDEBUG(D_ERROR, "gm_open Failure. Invalid Parameter\n"); + break; + case(GM_BUSY): + CDEBUG(D_ERROR, "gm_open Failure. GM Busy\n"); + break; + case(GM_NO_SUCH_DEVICE): + CDEBUG(D_ERROR, "gm_open Failure. No such device\n"); + break; + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib + and driver\n"); + break; + case(GM_OUT_OF_MEMORY): + CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n"); + break; + default: + CDEBUG(D_ERROR, "gm_open Failure. Unknow error + code [%d]\n", gm_status); + break; + } + GMNAL_GM_LOCK(nal_data); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + nal_data->small_msg_size = gmnal_small_msg_size; + nal_data->small_msg_gmsize = + gm_min_size_for_length(gmnal_small_msg_size); + + if (gmnal_alloc_srxd(nal_data) != GMNAL_STATUS_OK) { + CDEBUG(D_ERROR, "Failed to allocate small rx descriptors\n"); + gmnal_free_stxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + /* + * Hang out a bunch of small receive buffers + * In fact hang them all out + */ + while((srxd = gmnal_get_srxd(nal_data, 0))) { + CDEBUG(D_NET, "giving [%p] to gm_provide_recvive_buffer\n", + srxd->buffer); + GMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, + srxd->buffer, srxd->gmsize, + GM_LOW_PRIORITY, 0); + GMNAL_GM_UNLOCK(nal_data); + } + + /* + * Allocate pools of small tx buffers and descriptors + */ + if (gmnal_alloc_stxd(nal_data) != GMNAL_STATUS_OK) { + CDEBUG(D_ERROR, "Failed to allocate small tx descriptors\n"); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + gmnal_start_kernel_threads(nal_data); + + while (nal_data->rxthread_flag != GMNAL_RXTHREADS_STARTED) { + gmnal_yield(1); + CDEBUG(D_INFO, "Waiting for receive thread signs of life\n"); + } + + CDEBUG(D_INFO, "receive thread seems to have started\n"); + + + /* + * Initialise the portals library + */ + CDEBUG(D_NET, "Getting node id\n"); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_get_node_id(nal_data->gm_port, &local_nid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + CDEBUG(D_ERROR, "can't determine node id\n"); + gmnal_free_stxd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + nal_data->gm_local_nid = local_nid; + CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid, + &global_nid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_ERROR, "failed to obtain global id\n"); + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_stxd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid); + nal_data->gm_global_nid = global_nid; + +/* + pid = gm_getpid(); +*/ + CDEBUG(D_INFO, "portals_pid is [%u]\n", portals_pid); + portals_nid = (unsigned long)global_nid; + CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", portals_nid); + + CDEBUG(D_PORTALS, "calling lib_init\n"); + if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, + ac_size) != PTL_OK) { + CDEBUG(D_ERROR, "lib_init failed\n"); + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_stxd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + + } + + CDEBUG(D_INFO, "gmnal_init finished\n"); + global_nal_data = nal->nal_data; + return(nal); +} + + + +/* + * Called when module removed + */ +void gmnal_fini() +{ + gmnal_data_t *nal_data = global_nal_data; + nal_t *nal = nal_data->nal; + nal_cb_t *nal_cb = nal_data->nal_cb; + + CDEBUG(D_TRACE, "gmnal_fini\n"); + + PtlNIFini(kgmnal_ni); + lib_fini(nal_cb); + + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + gmnal_free_stxd(nal_data); + gmnal_free_srxd(nal_data); + GMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(gmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); +} + +EXPORT_SYMBOL(gmnal_init); +EXPORT_SYMBOL(gmnal_fini); +EXPORT_SYMBOL(gmnal_api_forward); +EXPORT_SYMBOL(gmnal_api_validate); +EXPORT_SYMBOL(gmnal_api_yield); +EXPORT_SYMBOL(gmnal_api_lock); +EXPORT_SYMBOL(gmnal_api_unlock); +EXPORT_SYMBOL(gmnal_api_shutdown); diff --git a/lustre/portals/knals/gmnal/gmnal_cb.c b/lustre/portals/knals/gmnal/gmnal_cb.c index 4728eca..093ee64 100644 --- a/lustre/portals/knals/gmnal/gmnal_cb.c +++ b/lustre/portals/knals/gmnal/gmnal_cb.c @@ -1,517 +1,290 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Based on ksocknal and qswnal + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Robert Read + * This file is part of Lustre, http://www.lustre.org/ * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or + * Lustre is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * - * Portals is distributed in the hope that it will be useful, + * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software + * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* TODO - * preallocate send buffers, store on list - * put receive buffers on queue, handle with receive threads - * use routing - */ - -#include "gmnal.h" - -extern kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *,int); - -static kgmnal_tx_t * -get_trans(void) -{ - kgmnal_tx_t *t; - PORTAL_ALLOC(t, (sizeof(kgmnal_tx_t))); - return t; -} - -static void -put_trans(kgmnal_tx_t *t) -{ - PORTAL_FREE(t, sizeof(kgmnal_tx_t)); -} - -int -kgmnal_ispeer (ptl_nid_t nid) -{ - unsigned int gmnid = (unsigned int)nid; - unsigned int nnids; - - gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids); - - return ((ptl_nid_t)gmnid == nid &&/* didn't lose high bits on conversion ? */ - gmnid < nnids); /* it's in this machine */ -} /* - * LIB functions follow - * + * This file implements the nal cb functions */ -static int -kgmnal_read (nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr, - size_t len) -{ - CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - -static int -kgmnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr, - size_t len) -{ - CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} -static void * -kgmnal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - PORTAL_ALLOC(buf, len); - return buf; -} +#include "gmnal.h" -static void -kgmnal_free(nal_cb_t *nal, void *buf, size_t len) +int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + unsigned int niov, struct iovec *iov, size_t mlen, + size_t rlen) { - PORTAL_FREE(buf, len); + gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; + int status = PTL_OK; + + + CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], + niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", + nal_cb, private, cookie, niov, iov, mlen, rlen); + + switch(srxd->type) { + case(GMNAL_SMALL_MESSAGE): + CDEBUG(D_INFO, "gmnal_cb_recv got small message\n"); + status = gmnal_small_rx(nal_cb, private, cookie, niov, + iov, mlen, rlen); + break; + case(GMNAL_LARGE_MESSAGE_INIT): + CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n"); + status = gmnal_large_rx(nal_cb, private, cookie, niov, + iov, mlen, rlen); + } + + + CDEBUG(D_INFO, "gmnal_cb_recv gmnal_return status [%d]\n", status); + return(status); } -static void -kgmnal_printf(nal_cb_t *nal, const char *fmt, ...) +int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + unsigned int kniov, ptl_kiov_t *kiov, size_t mlen, + size_t rlen) { - va_list ap; - char msg[256]; - - if (portal_debug & D_NET) { - va_start( ap, fmt ); - vsnprintf( msg, sizeof(msg), fmt, ap ); - va_end( ap ); - - printk("Lustre: CPUId: %d %s",smp_processor_id(), msg); - } + gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; + int status = PTL_OK; + struct iovec *iovec = NULL, *iovec_dup = NULL; + int i = 0; + + + CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], + cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n", + nal_cb, private, cookie, kniov, kiov, mlen, rlen); + + if (srxd->type == GMNAL_SMALL_MESSAGE) { + PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov); + if (!iovec) { + CDEBUG(D_ERROR, "Can't malloc\n"); + return(GMNAL_STATUS_FAIL); + } + iovec_dup = iovec; + + /* + * map each page and create an iovec for it + */ + for (i=0; ikiov_page, kiov->kiov_len, + kiov->kiov_offset); + iovec->iov_len = kiov->kiov_len; + CDEBUG(D_INFO, "Calling kmap[%p]", kiov->kiov_page); + + iovec->iov_base = kmap(kiov->kiov_page) + + kiov->kiov_offset; + + CDEBUG(D_INFO, "iov_base is [%p]\n", iovec->iov_base); + iovec++; + kiov++; + } + CDEBUG(D_INFO, "calling gmnal_small_rx\n"); + status = gmnal_small_rx(nal_cb, private, cookie, kniov, + iovec_dup, mlen, rlen); + PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov); + } + + + CDEBUG(D_INFO, "gmnal_return status [%d]\n", status); + return(status); } -static void -kgmnal_cli(nal_cb_t *nal, unsigned long *flags) +int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, + unsigned int niov, struct iovec *iov, size_t len) { - kgmnal_data_t *data= nal->nal_data; - spin_lock_irqsave(&data->kgm_dispatch_lock,*flags); + gmnal_data_t *nal_data; + + + CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] len["LPSZ"] nid["LPU64"]\n", + niov, len, nid); + nal_data = nal_cb->nal_data; + + if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) { + CDEBUG(D_INFO, "This is a small message send\n"); + gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, pid, + niov, iov, len); + } else { + CDEBUG(D_ERROR, "Large message send it is not supported\n"); + lib_finalize(nal_cb, private, cookie); + return(PTL_FAIL); + gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid, + niov, iov, len); + } + return(PTL_OK); } - -static void -kgmnal_sti(nal_cb_t *nal, unsigned long *flags) +int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len) { - kgmnal_data_t *data= nal->nal_data; - spin_unlock_irqrestore(&data->kgm_dispatch_lock,*flags); + int i = 0; + gmnal_data_t *nal_data; + struct iovec *iovec = NULL, *iovec_dup = NULL; + + CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len); + nal_data = nal_cb->nal_data; + PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec)); + iovec_dup = iovec; + if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) { + CDEBUG(D_INFO, "This is a small message send\n"); + + for (i=0; ikiov_page, kiov->kiov_len, + kiov->kiov_offset); + + iovec->iov_base = kmap(kiov->kiov_page) + + kiov->kiov_offset; + + iovec->iov_len = kiov->kiov_len; + iovec++; + kiov++; + } + gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, + pid, kniov, iovec_dup, len); + } else { + CDEBUG(D_ERROR, "Large message send it is not supported yet\n"); + return(PTL_FAIL); + for (i=0; ikiov_page, kiov->kiov_len, + kiov->kiov_offset); + + iovec->iov_base = kmap(kiov->kiov_page) + + kiov->kiov_offset; + iovec->iov_len = kiov->kiov_len; + iovec++; + kiov++; + } + gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, + pid, kniov, iovec, len); + } + PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec)); + return(PTL_OK); } - -static int -kgmnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) +int gmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst, + user_ptr src, size_t len) { - /* network distance doesn't mean much for this nal */ - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; + gm_bcopy(src, dst, len); + return(PTL_OK); } -/* FIXME rmr: add rounting code here */ -static void -kgmnal_tx_done(kgmnal_tx_t *trans, int error) -{ - lib_finalize(trans->ktx_nal, trans->ktx_private, trans->ktx_cookie); - - gm_dma_free(kgmnal_data.kgm_port, trans->ktx_buffer); - - trans->ktx_buffer = NULL; - trans->ktx_len = 0; - - put_trans(trans); -} -static char * gm_error_strings[GM_NUM_STATUS_CODES] = { - [GM_SUCCESS] = "GM_SUCCESS", - [GM_SEND_TIMED_OUT] = "GM_SEND_TIMED_OUT", - [GM_SEND_REJECTED] = "GM_SEND_REJECTED", - [GM_SEND_TARGET_PORT_CLOSED] = "GM_SEND_TARGET_PORT_CLOSED", - [GM_SEND_TARGET_NODE_UNREACHABLE] = "GM_SEND_TARGET_NODE_UNREACHABLE", - [GM_SEND_DROPPED] = "GM_SEND_DROPPED", - [GM_SEND_PORT_CLOSED] = "GM_SEND_PORT_CLOSED", -}; - -inline char * get_error(int status) +int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, + void *src, size_t len) { - if (gm_error_strings[status] != NULL) - return gm_error_strings[status]; - else - return "Unknown error"; + gm_bcopy(src, dst, len); + return(PTL_OK); } -static void -kgmnal_errhandler(struct gm_port *p, void *context, gm_status_t status) +int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, + ptl_event_t *ev) { - CDEBUG(D_NET,"error callback: ktx %p status %d\n", context, status); -} -static void -kgmnal_txhandler(struct gm_port *p, void *context, gm_status_t status) -{ - kgmnal_tx_t *ktx = (kgmnal_tx_t *)context; - int err = 0; - - LASSERT (p != NULL); - LASSERT (ktx != NULL); - - CDEBUG(D_NET,"ktx %p status %d nid 0x%x pid %d\n", ktx, status, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id); - - switch((int)status) { - case GM_SUCCESS: /* normal */ - break; - case GM_SEND_TIMED_OUT: /* application error */ - case GM_SEND_REJECTED: /* size of msg unacceptable */ - case GM_SEND_TARGET_PORT_CLOSED: - CERROR("%s (%d):\n", get_error(status), status); - gm_resume_sending(kgmnal_data.kgm_port, ktx->ktx_priority, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id, - kgmnal_errhandler, NULL); - err = -EIO; - break; - case GM_SEND_TARGET_NODE_UNREACHABLE: - case GM_SEND_PORT_CLOSED: - CERROR("%s (%d):\n", get_error(status), status); - gm_drop_sends(kgmnal_data.kgm_port, ktx->ktx_priority, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id, - kgmnal_errhandler, NULL); - err = -EIO; - break; - case GM_SEND_DROPPED: - CERROR("%s (%d):\n", get_error(status), status); - err = -EIO; - break; - default: - CERROR("Unknown status: %d\n", status); - err = -EIO; - break; - } - - kgmnal_tx_done(ktx, err); + if (eq->event_callback != NULL) { + CDEBUG(D_INFO, "found callback\n"); + eq->event_callback(ev); + } + + return(PTL_OK); } -/* - */ - -static int -kgmnal_send(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - int options, - unsigned int niov, - lib_md_iov_t *iov, - size_t len) +void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len) { - /* - * ipnal assumes that this is the private as passed to lib_dispatch.. - * so do we :/ - */ - kgmnal_tx_t *ktx=NULL; - int rc=0; - void * buf; - int buf_len = sizeof(ptl_hdr_t) + len; - int buf_size = 0; - - LASSERT ((options & PTL_MD_KIOV) == 0); - - PROF_START(gmnal_send); - - - CDEBUG(D_NET, "sending %d bytes from %p to nid: 0x%Lx pid %d\n", - len, iov, nid, KGM_PORT_NUM); - - /* ensure there is an available tx handle */ - - /* save transaction info to trans for later finalize and cleanup */ - ktx = get_trans(); - if (ktx == NULL) { - rc = -ENOMEM; - goto send_exit; - } - - /* hmmm... GM doesn't support vectored write, so need to allocate buffer to coalesce - header and data. - Also, memory must be dma'able or registered with GM. */ - - if (buf_len <= MSG_LEN_SMALL) { - buf_size = MSG_SIZE_SMALL; - } else if (buf_len <= MSG_LEN_LARGE) { - buf_size = MSG_SIZE_LARGE; - } else { - printk("LustreError: kgmnal:request exceeds TX MTU size (%d).\n", - MSG_SIZE_LARGE); - rc = -1; - goto send_exit; - } - - buf = gm_dma_malloc(kgmnal_data.kgm_port, buf_len); - if (buf == NULL) { - rc = -ENOMEM; - goto send_exit; - } - memcpy(buf, hdr, sizeof(ptl_hdr_t)); - - if (len != 0) - lib_copy_iov2buf(((char *)buf) + sizeof (ptl_hdr_t), - options, niov, iov, len); - - ktx->ktx_nal = nal; - ktx->ktx_private = private; - ktx->ktx_cookie = cookie; - ktx->ktx_len = buf_len; - ktx->ktx_size = buf_size; - ktx->ktx_buffer = buf; - ktx->ktx_priority = GM_LOW_PRIORITY; - ktx->ktx_tgt_node = nid; - ktx->ktx_tgt_port_id = KGM_PORT_NUM; - - CDEBUG(D_NET, "gm_send %d bytes (size %d) from %p to nid: 0x%Lx " - "pid %d pri %d\n", buf_len, buf_size, iov, nid, KGM_PORT_NUM, - GM_LOW_PRIORITY); - - gm_send_with_callback(kgmnal_data.kgm_port, buf, buf_size, - buf_len, GM_LOW_PRIORITY, - nid, KGM_PORT_NUM, - kgmnal_txhandler, ktx); - - PROF_FINISH(gmnal_send); - send_exit: - return rc; -} -void -kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - CERROR ("forwarding not implemented\n"); + void *ptr = NULL; + CDEBUG(D_TRACE, "gmnal_cb_malloc len["LPSZ"]\n", len); + PORTAL_ALLOC(ptr, len); + return(ptr); } -void -kqswnal_fwd_callback (void *arg, int error) +void gmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len) { - CERROR ("forwarding not implemented\n"); + CDEBUG(D_TRACE, "gmnal_cb_free :: buf[%p] len["LPSZ"]\n", buf, len); + PORTAL_FREE(buf, len); + return; } - -static inline void -kgmnal_requeue_rx(kgmnal_rx_t *krx) +void gmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, + void **addrkey) { - gm_provide_receive_buffer(kgmnal_data.kgm_port, krx->krx_buffer, - krx->krx_size, krx->krx_priority); + return; } -/* Process a received portals packet */ - -/* Receive Interrupt Handler */ -static void kgmnal_rx(kgmnal_data_t *kgm, unsigned long len, unsigned int size, - void * buf, unsigned int pri) +int gmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, + void**addrkey) { - ptl_hdr_t *hdr = buf; - kgmnal_rx_t krx; - - CDEBUG(D_NET,"buf %p, len %ld\n", buf, len); - - if ( len < sizeof( ptl_hdr_t ) ) { - /* XXX what's this for? */ - if (kgm->kgm_shuttingdown) - return; - CERROR("kgmnal: did not receive complete portal header, " - "len= %ld", len); - gm_provide_receive_buffer(kgm->kgm_port, buf, size, pri); - return; - } - - /* might want to use seperate threads to handle receive */ - krx.krx_buffer = buf; - krx.krx_len = len; - krx.krx_size = size; - krx.krx_priority = pri; - - if ( hdr->dest_nid == kgmnal_lib.ni.nid ) { - PROF_START(lib_parse); - lib_parse(&kgmnal_lib, (ptl_hdr_t *)krx.krx_buffer, &krx); - PROF_FINISH(lib_parse); - } else if (kgmnal_ispeer(hdr->dest_nid)) { - /* should have gone direct to peer */ - CERROR("dropping packet from 0x%llx to 0x%llx: target is " - "a peer", hdr->src_nid, hdr->dest_nid); - kgmnal_requeue_rx(&krx); - } else { - /* forward to gateway */ - CERROR("forwarding not implemented yet"); - kgmnal_requeue_rx(&krx); - } - - return; + return(PTL_OK); } - -static int kgmnal_recv(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - int options, - unsigned int niov, - lib_md_iov_t *iov, - size_t mlen, - size_t rlen) +void gmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...) { - kgmnal_rx_t *krx = private; - - LASSERT ((options & PTL_MD_KIOV) == 0); - - CDEBUG(D_NET,"mlen=%d, rlen=%d\n", mlen, rlen); - - /* What was actually received must be >= what sender claims to - * have sent. This is an LASSERT, since lib-move doesn't - * check cb return code yet. */ - LASSERT (krx->krx_len >= sizeof (ptl_hdr_t) + rlen); - LASSERT (mlen <= rlen); - - PROF_START(gmnal_recv); - - if(mlen != 0) { - PROF_START(memcpy); - lib_copy_buf2iov (options, niov, iov, - krx->krx_buffer + sizeof (ptl_hdr_t), mlen); - PROF_FINISH(memcpy); - } - - PROF_START(lib_finalize); - lib_finalize(nal, private, cookie); - PROF_FINISH(lib_finalize); - - kgmnal_requeue_rx(krx); - - PROF_FINISH(gmnal_recv); - - return rlen; + CDEBUG(D_TRACE, "gmnal_cb_printf\n"); + printk(fmt); + return; } - -static void kgmnal_shutdown(void * none) +void gmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags) { - CERROR("called\n"); - return; + gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; + + spin_lock_irqsave(&nal_data->cb_lock, *flags); + return; } -/* - * Set terminate and use alarm to wake up the recv thread. - */ -static void recv_shutdown(kgmnal_data_t *kgm) +void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags) { - gm_alarm_t alarm; + gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; - kgm->kgm_shuttingdown = 1; - gm_initialize_alarm(&alarm); - gm_set_alarm(kgm->kgm_port, &alarm, 1, kgmnal_shutdown, NULL); + spin_unlock_irqrestore(&nal_data->cb_lock, *flags); + return; } -int kgmnal_end(kgmnal_data_t *kgm) +int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist) { + CDEBUG(D_TRACE, "gmnal_cb_dist\n"); + if (dist) + *dist = 27; + return(PTL_OK); +} - /* wait for sends to finish ? */ - /* remove receive buffers */ - /* shutdown receive thread */ - recv_shutdown(kgm); - return 0; -} - -/* Used only for the spinner */ -int kgmnal_recv_thread(void *arg) -{ - kgmnal_data_t *kgm = arg; - - LASSERT(kgm != NULL); - - kportal_daemonize("kgmnal_rx"); - - while(1) { - gm_recv_event_t *e; - int priority = GM_LOW_PRIORITY; - if (kgm->kgm_shuttingdown) - break; - - e = gm_blocking_receive_no_spin(kgm->kgm_port); - if (e == NULL) { - CERROR("gm_blocking_receive returned NULL\n"); - break; - } - - switch(gm_ntohc(e->recv.type)) { - case GM_HIGH_RECV_EVENT: - priority = GM_HIGH_PRIORITY; - /* fall through */ - case GM_RECV_EVENT: - kgmnal_rx(kgm, gm_ntohl(e->recv.length), - gm_ntohc(e->recv.size), - gm_ntohp(e->recv.buffer), priority); - break; - case GM_ALARM_EVENT: - CERROR("received alarm"); - gm_unknown(kgm->kgm_port, e); - break; - case GM_BAD_SEND_DETECTED_EVENT: /* ?? */ - CERROR("received bad send!\n"); - break; - default: - gm_unknown(kgm->kgm_port, e); - } - } - - CERROR("shuttting down.\n"); - return 0; -} -nal_cb_t kgmnal_lib = { - nal_data: &kgmnal_data, /* NAL private data */ - cb_send: kgmnal_send, - cb_recv: kgmnal_recv, - cb_read: kgmnal_read, - cb_write: kgmnal_write, - cb_malloc: kgmnal_malloc, - cb_free: kgmnal_free, - cb_printf: kgmnal_printf, - cb_cli: kgmnal_cli, - cb_sti: kgmnal_sti, - cb_dist: kgmnal_dist -}; +EXPORT_SYMBOL(gmnal_cb_send); +EXPORT_SYMBOL(gmnal_cb_send_pages); +EXPORT_SYMBOL(gmnal_cb_recv); +EXPORT_SYMBOL(gmnal_cb_recv_pages); +EXPORT_SYMBOL(gmnal_cb_read); +EXPORT_SYMBOL(gmnal_cb_write); +EXPORT_SYMBOL(gmnal_cb_cli); +EXPORT_SYMBOL(gmnal_cb_sti); +EXPORT_SYMBOL(gmnal_cb_dist); +EXPORT_SYMBOL(gmnal_cb_printf); +EXPORT_SYMBOL(gmnal_cb_map); +EXPORT_SYMBOL(gmnal_cb_unmap); +EXPORT_SYMBOL(gmnal_cb_callback); +EXPORT_SYMBOL(gmnal_cb_free); +EXPORT_SYMBOL(gmnal_cb_malloc); diff --git a/lustre/portals/knals/gmnal/gmnal_comm.c b/lustre/portals/knals/gmnal/gmnal_comm.c new file mode 100644 index 0000000..9e32145 --- /dev/null +++ b/lustre/portals/knals/gmnal/gmnal_comm.c @@ -0,0 +1,1316 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * This file contains all gmnal send and receive functions + */ + +#include "gmnal.h" + +/* + * The caretaker thread + * This is main thread of execution for the NAL side + * This guy waits in gm_blocking_recvive and gets + * woken up when the myrinet adaptor gets an interrupt. + * Hands off receive operations to the receive thread + * This thread Looks after gm_callbacks etc inline. + */ +int +gmnal_ct_thread(void *arg) +{ + gmnal_data_t *nal_data; + gm_recv_event_t *rxevent = NULL; + + if (!arg) { + CDEBUG(D_TRACE, "NO nal_data. Exiting\n"); + return(-1); + } + + nal_data = (gmnal_data_t*)arg; + CDEBUG(D_TRACE, "nal_data is [%p]\n", arg); + + daemonize(); + + nal_data->ctthread_flag = GMNAL_CTTHREAD_STARTED; + + GMNAL_GM_LOCK(nal_data); + while(nal_data->ctthread_flag == GMNAL_CTTHREAD_STARTED) { + CDEBUG(D_NET, "waiting\n"); + rxevent = gm_blocking_receive_no_spin(nal_data->gm_port); + CDEBUG(D_INFO, "got [%s]\n", gmnal_rxevent(rxevent)); + if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) { + CDEBUG(D_INFO, "time to exit\n"); + break; + } + switch (GM_RECV_EVENT_TYPE(rxevent)) { + + case(GM_RECV_EVENT): + CDEBUG(D_NET, "CTTHREAD:: GM_RECV_EVENT\n"); + GMNAL_GM_UNLOCK(nal_data); + gmnal_add_rxtwe(nal_data, rxevent); + GMNAL_GM_LOCK(nal_data); + CDEBUG(D_NET, "CTTHREAD:: Added event to Q\n"); + break; + case(_GM_SLEEP_EVENT): + /* + * Blocking receive above just returns + * immediatly with _GM_SLEEP_EVENT + * Don't know what this is + */ + CDEBUG(D_NET, "Sleeping in gm_unknown\n"); + GMNAL_GM_UNLOCK(nal_data); + gm_unknown(nal_data->gm_port, rxevent); + GMNAL_GM_LOCK(nal_data); + CDEBUG(D_INFO, "Awake from gm_unknown\n"); + break; + + default: + /* + * Don't know what this is + * gm_unknown will make sense of it + * Should be able to do something with + * FAST_RECV_EVENTS here. + */ + CDEBUG(D_NET, "Passing event to gm_unknown\n"); + GMNAL_GM_UNLOCK(nal_data); + gm_unknown(nal_data->gm_port, rxevent); + GMNAL_GM_LOCK(nal_data); + CDEBUG(D_INFO, "Processed unknown event\n"); + } + } + GMNAL_GM_UNLOCK(nal_data); + nal_data->ctthread_flag = GMNAL_THREAD_RESET; + CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data); + return(GMNAL_STATUS_OK); +} + + +/* + * process a receive event + */ +int gmnal_rx_thread(void *arg) +{ + gmnal_data_t *nal_data; + gm_recv_event_t *rxevent = NULL; + gm_recv_t *recv = NULL; + void *buffer; + gmnal_rxtwe_t *we = NULL; + + if (!arg) { + CDEBUG(D_TRACE, "NO nal_data. Exiting\n"); + return(-1); + } + + nal_data = (gmnal_data_t*)arg; + CDEBUG(D_TRACE, "nal_data is [%p]\n", arg); + + daemonize(); + /* + * set 1 bit for each thread started + * doesn't matter which bit + */ + spin_lock(&nal_data->rxthread_flag_lock); + if (nal_data->rxthread_flag) + nal_data->rxthread_flag=nal_data->rxthread_flag*2 + 1; + else + nal_data->rxthread_flag = 1; + CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag); + spin_unlock(&nal_data->rxthread_flag_lock); + + while(nal_data->rxthread_stop_flag != GMNAL_THREAD_STOP) { + CDEBUG(D_NET, "RXTHREAD:: Receive thread waiting\n"); + we = gmnal_get_rxtwe(nal_data); + if (!we) { + CDEBUG(D_INFO, "Receive thread time to exit\n"); + break; + } + rxevent = we->rx; + CDEBUG(D_INFO, "thread got [%s]\n", gmnal_rxevent(rxevent)); + recv = (gm_recv_t*)&(rxevent->recv); + buffer = gm_ntohp(recv->buffer); + PORTAL_FREE(we, sizeof(gmnal_rxtwe_t)); + + switch(((gmnal_msghdr_t*)buffer)->type) { + case(GMNAL_SMALL_MESSAGE): + gmnal_pre_receive(nal_data, recv, + GMNAL_SMALL_MESSAGE); + break; + case(GMNAL_LARGE_MESSAGE_INIT): + gmnal_pre_receive(nal_data, recv, + GMNAL_LARGE_MESSAGE_INIT); + break; + case(GMNAL_LARGE_MESSAGE_ACK): + gmnal_pre_receive(nal_data, recv, + GMNAL_LARGE_MESSAGE_ACK); + break; + default: + CDEBUG(D_ERROR, "Unsupported message type\n"); + gmnal_rx_bad(nal_data, recv, NULL); + } + } + + spin_lock(&nal_data->rxthread_flag_lock); + nal_data->rxthread_flag/=2; + CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag); + spin_unlock(&nal_data->rxthread_flag_lock); + CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data); + return(GMNAL_STATUS_OK); +} + + + +/* + * Start processing a small message receive + * Get here from gmnal_receive_thread + * Hand off to lib_parse, which calls cb_recv + * which hands back to gmnal_small_receive + * Deal with all endian stuff here. + */ +int +gmnal_pre_receive(gmnal_data_t *nal_data, gm_recv_t *recv, int gmnal_type) +{ + gmnal_srxd_t *srxd = NULL; + void *buffer = NULL; + unsigned int snode, sport, type, length; + gmnal_msghdr_t *gmnal_msghdr; + ptl_hdr_t *portals_hdr; + + CDEBUG(D_INFO, "nal_data [%p], recv [%p] type [%d]\n", + nal_data, recv, gmnal_type); + + buffer = gm_ntohp(recv->buffer);; + snode = (int)gm_ntoh_u16(recv->sender_node_id); + sport = (int)gm_ntoh_u8(recv->sender_port_id); + type = (int)gm_ntoh_u8(recv->type); + buffer = gm_ntohp(recv->buffer); + length = (int) gm_ntohl(recv->length); + + gmnal_msghdr = (gmnal_msghdr_t*)buffer; + portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE); + + CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], + type [%d], length [%d], buffer [%p]\n", + snode, sport, type, length, buffer); + CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], + gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, + gmnal_msghdr->magic, gmnal_msghdr->type); + CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], + dest_node ["LPD64"]\n", portals_hdr->src_nid, + portals_hdr->dest_nid); + + + /* + * Get a receive descriptor for this message + */ + srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer); + CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n"); + srxd->nal_data = nal_data; + if (!srxd) { + CDEBUG(D_ERROR, "Failed to get receive descriptor\n"); + lib_parse(nal_data->nal_cb, portals_hdr, srxd); + return(GMNAL_STATUS_FAIL); + } + + /* + * no need to bother portals library with this + */ + if (gmnal_type == GMNAL_LARGE_MESSAGE_ACK) { + gmnal_large_tx_ack_received(nal_data, srxd); + return(GMNAL_STATUS_OK); + } + + srxd->type = gmnal_type; + srxd->nsiov = gmnal_msghdr->niov; + srxd->gm_source_node = gmnal_msghdr->sender_node_id; + + CDEBUG(D_PORTALS, "Calling lib_parse buffer is [%p]\n", + buffer+GMNAL_MSGHDR_SIZE); + /* + * control passes to lib, which calls cb_recv + * cb_recv is responsible for returning the buffer + * for future receive + */ + lib_parse(nal_data->nal_cb, portals_hdr, srxd); + + return(GMNAL_STATUS_OK); +} + + + +/* + * After a receive has been processed, + * hang out the receive buffer again. + * This implicitly returns a receive token. + */ +int +gmnal_rx_requeue_buffer(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) +{ + CDEBUG(D_TRACE, "gmnal_rx_requeue_buffer\n"); + + CDEBUG(D_NET, "requeueing srxd[%p] nal_data[%p]\n", srxd, nal_data); + + GMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0 ); + GMNAL_GM_UNLOCK(nal_data); + + return(GMNAL_STATUS_OK); +} + + +/* + * Handle a bad message + * A bad message is one we don't expect or can't interpret + */ +int +gmnal_rx_bad(gmnal_data_t *nal_data, gm_recv_t *recv, gmnal_srxd_t *srxd) +{ + CDEBUG(D_TRACE, "Can't handle message\n"); + + if (!srxd) + srxd = gmnal_rxbuffer_to_srxd(nal_data, + gm_ntohp(recv->buffer)); + if (srxd) { + gmnal_rx_requeue_buffer(nal_data, srxd); + } else { + CDEBUG(D_ERROR, "Can't find a descriptor for this buffer\n"); + /* + * get rid of it ? + */ + return(GMNAL_STATUS_FAIL); + } + + return(GMNAL_STATUS_OK); +} + + + +/* + * Process a small message receive. + * Get here from gmnal_receive_thread, gmnal_pre_receive + * lib_parse, cb_recv + * Put data from prewired receive buffer into users buffer(s) + * Hang out the receive buffer again for another receive + * Call lib_finalize + */ +int +gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) +{ + gmnal_srxd_t *srxd = NULL; + void *buffer = NULL; + gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; + + + CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen); + + if (!private) { + CDEBUG(D_ERROR, "gmnal_small_rx no context\n"); + lib_finalize(nal_cb, private, cookie); + return(PTL_FAIL); + } + + srxd = (gmnal_srxd_t*)private; + buffer = srxd->buffer; + buffer += sizeof(gmnal_msghdr_t); + buffer += sizeof(ptl_hdr_t); + + while(niov--) { + CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov, + iov->iov_len); + gm_bcopy(buffer, iov->iov_base, iov->iov_len); + buffer += iov->iov_len; + iov++; + } + + + /* + * let portals library know receive is complete + */ + CDEBUG(D_PORTALS, "calling lib_finalize\n"); + if (lib_finalize(nal_cb, private, cookie) != PTL_OK) { + /* TO DO what to do with failed lib_finalise? */ + CDEBUG(D_INFO, "lib_finalize failed\n"); + } + /* + * return buffer so it can be used again + */ + CDEBUG(D_NET, "calling gm_provide_receive_buffer\n"); + GMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0); + GMNAL_GM_UNLOCK(nal_data); + + return(PTL_OK); +} + + +/* + * Start a small transmit. + * Get a send token (and wired transmit buffer). + * Copy data from senders buffer to wired buffer and + * initiate gm_send from the wired buffer. + * The callback function informs when the send is complete. + */ +int +gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, + unsigned int niov, struct iovec *iov, int size) +{ + gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data; + gmnal_stxd_t *stxd = NULL; + void *buffer = NULL; + gmnal_msghdr_t *msghdr = NULL; + int tot_size = 0; + unsigned int local_nid; + gm_status_t gm_status = GM_SUCCESS; + + CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] + hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] + iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, + global_nid, pid, niov, iov, size); + + CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n", + hdr->dest_nid, hdr->src_nid); + + if (!nal_data) { + CDEBUG(D_ERROR, "no nal_data\n"); + return(GMNAL_STATUS_FAIL); + } else { + CDEBUG(D_INFO, "nal_data [%p]\n", nal_data); + } + + GMNAL_GM_LOCK(nal_data); + gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, + &local_nid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_ERROR, "Failed to obtain local id\n"); + return(GMNAL_STATUS_FAIL); + } + CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid); + + stxd = gmnal_get_stxd(nal_data, 1); + CDEBUG(D_INFO, "stxd [%p]\n", stxd); + + stxd->type = GMNAL_SMALL_MESSAGE; + stxd->cookie = cookie; + + /* + * Copy gmnal_msg_hdr and portals header to the transmit buffer + * Then copy the data in + */ + buffer = stxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + + msghdr->magic = GMNAL_MAGIC; + msghdr->type = GMNAL_SMALL_MESSAGE; + msghdr->sender_node_id = nal_data->gm_global_nid; + CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer); + + buffer += sizeof(gmnal_msghdr_t); + + CDEBUG(D_INFO, "processing portals hdr at [%p]\n", buffer); + gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); + + buffer += sizeof(ptl_hdr_t); + + while(niov--) { + CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n", + iov, iov->iov_len, buffer); + gm_bcopy(iov->iov_base, buffer, iov->iov_len); + buffer+= iov->iov_len; + iov++; + } + + CDEBUG(D_INFO, "sending\n"); + tot_size = size+sizeof(ptl_hdr_t)+sizeof(gmnal_msghdr_t); + stxd->msg_size = tot_size; + + + CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] + gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] + stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, + stxd->msg_size, global_nid, local_nid, stxd); + + GMNAL_GM_LOCK(nal_data); + stxd->gm_priority = GM_LOW_PRIORITY; + stxd->gm_target_node = local_nid; + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, + stxd->gm_size, stxd->msg_size, + GM_LOW_PRIORITY, local_nid, + gmnal_small_tx_callback, (void*)stxd); + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "done\n"); + + return(PTL_OK); +} + + +/* + * A callback to indicate the small transmit operation is compete + * Check for erros and try to deal with them. + * Call lib_finalise to inform the client application that the send + * is complete and the memory can be reused. + * Return the stxd when finished with it (returns a send token) + */ +void +gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) +{ + gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; + lib_msg_t *cookie = stxd->cookie; + gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data; + nal_cb_t *nal_cb = nal_data->nal_cb; + + if (!stxd) { + CDEBUG(D_TRACE, "send completion event for unknown stxd\n"); + return; + } + if (status != GM_SUCCESS) { + CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s]\n", + stxd, gmnal_gm_error(status)); + } + + switch(status) { + case(GM_SUCCESS): + break; + + + + case(GM_SEND_DROPPED): + /* + * do a resend on the dropped ones + */ + CDEBUG(D_ERROR, "send stxd [%p] was dropped + resending\n", context); + GMNAL_GM_LOCK(nal_data); + gm_send_to_peer_with_callback(nal_data->gm_port, + stxd->buffer, + stxd->gm_size, + stxd->msg_size, + stxd->gm_priority, + stxd->gm_target_node, + gmnal_small_tx_callback, + context); + GMNAL_GM_UNLOCK(nal_data); + + return; + case(GM_TIMED_OUT): + case(GM_SEND_TIMED_OUT): + /* + * drop these ones + */ + CDEBUG(D_INFO, "calling gm_drop_sends\n"); + GMNAL_GM_LOCK(nal_data); + gm_drop_sends(nal_data->gm_port, stxd->gm_priority, + stxd->gm_target_node, GMNAL_GM_PORT, + gmnal_drop_sends_callback, context); + GMNAL_GM_UNLOCK(nal_data); + + return; + + + /* + * abort on these ? + */ + case(GM_TRY_AGAIN): + case(GM_INTERRUPTED): + case(GM_FAILURE): + case(GM_INPUT_BUFFER_TOO_SMALL): + case(GM_OUTPUT_BUFFER_TOO_SMALL): + case(GM_BUSY): + case(GM_MEMORY_FAULT): + case(GM_INVALID_PARAMETER): + case(GM_OUT_OF_MEMORY): + case(GM_INVALID_COMMAND): + case(GM_PERMISSION_DENIED): + case(GM_INTERNAL_ERROR): + case(GM_UNATTACHED): + case(GM_UNSUPPORTED_DEVICE): + case(GM_SEND_REJECTED): + case(GM_SEND_TARGET_PORT_CLOSED): + case(GM_SEND_TARGET_NODE_UNREACHABLE): + case(GM_SEND_PORT_CLOSED): + case(GM_NODE_ID_NOT_YET_SET): + case(GM_STILL_SHUTTING_DOWN): + case(GM_CLONE_BUSY): + case(GM_NO_SUCH_DEVICE): + case(GM_ABORTED): + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + case(GM_UNTRANSLATED_SYSTEM_ERROR): + case(GM_ACCESS_DENIED): + case(GM_NO_DRIVER_SUPPORT): + case(GM_PTE_REF_CNT_OVERFLOW): + case(GM_NOT_SUPPORTED_IN_KERNEL): + case(GM_NOT_SUPPORTED_ON_ARCH): + case(GM_NO_MATCH): + case(GM_USER_ERROR): + case(GM_DATA_CORRUPTED): + case(GM_HARDWARE_FAULT): + case(GM_SEND_ORPHANED): + case(GM_MINOR_OVERFLOW): + case(GM_PAGE_TABLE_FULL): + case(GM_UC_ERROR): + case(GM_INVALID_PORT_NUMBER): + case(GM_DEV_NOT_FOUND): + case(GM_FIRMWARE_NOT_RUNNING): + case(GM_YP_NO_MATCH): + default: + CDEBUG(D_ERROR, "Unknown send error\n"); + } + if (stxd->type == GMNAL_LARGE_MESSAGE_INIT) { + CDEBUG(D_INFO, "large transmit done\n"); + return; + } + gmnal_return_stxd(nal_data, stxd); + if (lib_finalize(nal_cb, stxd, cookie) != PTL_OK) { + CDEBUG(D_INFO, "Call to lib_finalize failed for stxd [%p]\n", + stxd); + } + return; +} + + + +void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, + gm_status_t status) +{ + gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; + gmnal_data_t *nal_data = stxd->nal_data; + + CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context); + if (status == GM_SUCCESS) { + GMNAL_GM_LOCK(nal_data); + gm_send_to_peer_with_callback(gm_port, stxd->buffer, + stxd->gm_size, stxd->msg_size, + stxd->gm_priority, + stxd->gm_target_node, + gmnal_small_tx_callback, + context); + GMNAL_GM_LOCK(nal_data); + } else { + CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is + [%d][%s]\n", stxd, status, gmnal_gm_error(status)); + } + + + return; +} + + +/* + * Begine a large transmit. + * Do a gm_register of the memory pointed to by the iovec + * and send details to the receiver. The receiver does a gm_get + * to pull the data and sends and ack when finished. Upon receipt of + * this ack, deregister the memory. Only 1 send token is required here. + */ +int +gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, + unsigned int niov, struct iovec *iov, int size) +{ + + gmnal_data_t *nal_data; + gmnal_stxd_t *stxd = NULL; + void *buffer = NULL; + gmnal_msghdr_t *msghdr = NULL; + unsigned int local_nid; + int mlen = 0; /* the size of the init message data */ + struct iovec *iov_dup = NULL; + gm_status_t gm_status; + int niov_dup; + + + CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] + hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], + iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type, + global_nid, pid, niov, iov, size); + + if (nal_cb) + nal_data = (gmnal_data_t*)nal_cb->nal_data; + else { + CDEBUG(D_ERROR, "no nal_cb.\n"); + return(GMNAL_STATUS_FAIL); + } + + + /* + * Get stxd and buffer. Put local address of data in buffer, + * send local addresses to target, + * wait for the target node to suck the data over. + * The stxd is used to ren + */ + stxd = gmnal_get_stxd(nal_data, 1); + CDEBUG(D_INFO, "stxd [%p]\n", stxd); + + stxd->type = GMNAL_LARGE_MESSAGE_INIT; + stxd->cookie = cookie; + + /* + * Copy gmnal_msg_hdr and portals header to the transmit buffer + * Then copy the iov in + */ + buffer = stxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + + CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer); + + msghdr->magic = GMNAL_MAGIC; + msghdr->type = GMNAL_LARGE_MESSAGE_INIT; + msghdr->sender_node_id = nal_data->gm_global_nid; + msghdr->stxd = stxd; + msghdr->niov = niov ; + buffer += sizeof(gmnal_msghdr_t); + mlen = sizeof(gmnal_msghdr_t); + CDEBUG(D_INFO, "mlen is [%d]\n", mlen); + + + CDEBUG(D_INFO, "processing portals hdr at [%p]\n", buffer); + + gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); + buffer += sizeof(ptl_hdr_t); + mlen += sizeof(ptl_hdr_t); + CDEBUG(D_INFO, "mlen is [%d]\n", mlen); + + /* + * copy the iov to the buffer so target knows + * where to get the data from + */ + CDEBUG(D_INFO, "processing iov to [%p]\n", buffer); + gm_bcopy(iov, buffer, niov*sizeof(struct iovec)); + mlen += niov*(sizeof(struct iovec)); + CDEBUG(D_INFO, "mlen is [%d]\n", mlen); + + + /* + * Store the iovs in the stxd for we can get + * them later if we need them + */ + CDEBUG(D_NET, "Copying iov [%p] to [%p]\n", iov, stxd->iov); + gm_bcopy(iov, stxd->iov, niov*sizeof(struct iovec)); + stxd->niov = niov; + + + /* + * register the memory so the NIC can get hold of the data + * This is a slow process. it'd be good to overlap it + * with something else. + */ + iov_dup = iov; + niov_dup = niov; + while(niov--) { + CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n", + iov->iov_base, iov->iov_len); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, + iov->iov_base, iov->iov_len); + if (gm_status != GM_SUCCESS) { + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] + for memory [%p] len ["LPSZ"]\n", + gm_status, gmnal_gm_error(gm_status), + iov->iov_base, iov->iov_len); + GMNAL_GM_LOCK(nal_data); + while (iov_dup != iov) { + gm_deregister_memory(nal_data->gm_port, + iov_dup->iov_base, + iov_dup->iov_len); + iov_dup++; + } + GMNAL_GM_UNLOCK(nal_data); + gmnal_return_stxd(nal_data, stxd); + return(PTL_FAIL); + } + + GMNAL_GM_UNLOCK(nal_data); + iov++; + } + + /* + * Send the init message to the target + */ + CDEBUG(D_INFO, "sending mlen [%d]\n", mlen); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, + &local_nid); + if (gm_status != GM_SUCCESS) { + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_ERROR, "Failed to obtain local id\n"); + gmnal_return_stxd(nal_data, stxd); + /* TO DO deregister memory on failure */ + return(GMNAL_STATUS_FAIL); + } + CDEBUG(D_INFO, "Local Node_id is [%d]\n", local_nid); + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, + stxd->gm_size, mlen, GM_LOW_PRIORITY, + local_nid, gmnal_large_tx_callback, + (void*)stxd); + GMNAL_GM_UNLOCK(nal_data); + + CDEBUG(D_INFO, "done\n"); + + return(PTL_OK); +} + +/* + * Callback function indicates that send of buffer with + * large message iovec has completed (or failed). + */ +void +gmnal_large_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) +{ + gmnal_small_tx_callback(gm_port, context, status); + +} + + + +/* + * Have received a buffer that contains an iovec of the sender. + * Do a gm_register_memory of the receivers buffer and then do a get + * data from the sender. + */ +int +gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, + unsigned int nriov, struct iovec *riov, size_t mlen, + size_t rlen) +{ + gmnal_data_t *nal_data = nal_cb->nal_data; + gmnal_srxd_t *srxd = (gmnal_srxd_t*)private; + void *buffer = NULL; + struct iovec *riov_dup; + int nriov_dup; + gmnal_msghdr_t *msghdr = NULL; + gm_status_t gm_status; + + CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], + cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n", + nal_cb, private, cookie, nriov, riov, mlen, rlen); + + if (!srxd) { + CDEBUG(D_ERROR, "gmnal_large_rx no context\n"); + lib_finalize(nal_cb, private, cookie); + return(PTL_FAIL); + } + + buffer = srxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + buffer += sizeof(gmnal_msghdr_t); + buffer += sizeof(ptl_hdr_t); + + /* + * Store the senders stxd address in the srxd for this message + * The gmnal_large_message_ack needs it to notify the sender + * the pull of data is complete + */ + srxd->source_stxd = msghdr->stxd; + + /* + * Register the receivers memory + * get the data, + * tell the sender that we got the data + * then tell the receiver we got the data + */ + nriov_dup = nriov; + riov_dup = riov; + while(nriov--) { + CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n", + riov->iov_base, riov->iov_len); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, + riov->iov_base, riov->iov_len); + if (gm_status != GM_SUCCESS) { + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] + for memory [%p] len ["LPSZ"]\n", + gm_status, gmnal_gm_error(gm_status), + riov->iov_base, riov->iov_len); + GMNAL_GM_LOCK(nal_data); + while (riov_dup != riov) { + gm_deregister_memory(nal_data->gm_port, + riov_dup->iov_base, + riov_dup->iov_len); + riov_dup++; + } + GMNAL_GM_LOCK(nal_data); + /* + * give back srxd and buffer. Send NACK to sender + */ + return(PTL_FAIL); + } + GMNAL_GM_UNLOCK(nal_data); + riov++; + } + /* + * do this so the final gm_get callback can deregister the memory + */ + PORTAL_ALLOC(srxd->riov, nriov_dup*(sizeof(struct iovec))); + gm_bcopy(riov_dup, srxd->riov, nriov_dup*(sizeof(struct iovec))); + srxd->nriov = nriov_dup; + + /* + * now do gm_get to get the data + */ + srxd->cookie = cookie; + if (gmnal_remote_get(srxd, srxd->nsiov, (struct iovec*)buffer, + nriov_dup, riov_dup) != GMNAL_STATUS_OK) { + CDEBUG(D_ERROR, "can't get the data"); + } + + CDEBUG(D_INFO, "lgmanl_large_rx done\n"); + + return(PTL_OK); +} + + +/* + * Perform a number of remote gets as part of receiving + * a large message. + * The final one to complete (i.e. the last callback to get called) + * tidies up. + * gm_get requires a send token. + */ +int +gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov, + int nriov, struct iovec *riov) +{ + + int ncalls = 0; + + CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], + nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov); + + + ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov); + if (ncalls < 0) { + CDEBUG(D_ERROR, "there's something wrong with the iovecs\n"); + return(GMNAL_STATUS_FAIL); + } + CDEBUG(D_INFO, "gmnal_remote_get ncalls [%d]\n", ncalls); + spin_lock_init(&srxd->callback_lock); + srxd->ncallbacks = ncalls; + srxd->callback_status = 0; + + ncalls = gmnal_copyiov(1, srxd, nsiov, siov, nriov, riov); + if (ncalls < 0) { + CDEBUG(D_ERROR, "there's something wrong with the iovecs\n"); + return(GMNAL_STATUS_FAIL); + } + + return(GMNAL_STATUS_OK); + +} + + +/* + * pull data from source node (source iovec) to a local iovec. + * The iovecs may not match which adds the complications below. + * Count the number of gm_gets that will be required to the callbacks + * can determine who is the last one. + */ +int +gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov, + struct iovec *siov, int nriov, struct iovec *riov) +{ + + int ncalls = 0; + int slen = siov->iov_len, rlen = riov->iov_len; + char *sbuf = siov->iov_base, *rbuf = riov->iov_base; + unsigned long sbuf_long; + gm_remote_ptr_t remote_ptr = 0; + unsigned int source_node; + gmnal_stxd_t *stxd = NULL; + gmnal_data_t *nal_data = srxd->nal_data; + + CDEBUG(D_TRACE, "copy[%d] nal_data[%p]\n", do_copy, nal_data); + if (do_copy) { + if (!nal_data) { + CDEBUG(D_ERROR, "Bad args No nal_data\n"); + return(GMNAL_STATUS_FAIL); + } + GMNAL_GM_LOCK(nal_data); + if (gm_global_id_to_node_id(nal_data->gm_port, + srxd->gm_source_node, + &source_node) != GM_SUCCESS) { + + CDEBUG(D_ERROR, "cannot resolve global_id [%u] + to local node_id\n", srxd->gm_source_node); + GMNAL_GM_UNLOCK(nal_data); + return(GMNAL_STATUS_FAIL); + } + GMNAL_GM_UNLOCK(nal_data); + /* + * We need a send token to use gm_get + * getting an stxd gets us a send token. + * the stxd is used as the context to the + * callback function (so stxd can be returned). + * Set pointer in stxd to srxd so callback count in srxd + * can be decremented to find last callback to complete + */ + stxd = gmnal_get_stxd(nal_data, 1); + stxd->srxd = srxd; + CDEBUG(D_INFO, "gmnal_copyiov source node is G[%u]L[%d]\n", + srxd->gm_source_node, source_node); + } + + do { + CDEBUG(D_INFO, "sbuf[%p] slen[%d] rbuf[%p], rlen[%d]\n", + sbuf, slen, rbuf, rlen); + if (slen > rlen) { + ncalls++; + if (do_copy) { + CDEBUG(D_INFO, "slen>rlen\n"); + GMNAL_GM_LOCK(nal_data); + /* + * funny business to get rid + * of compiler warning + */ + sbuf_long = (unsigned long) sbuf; + remote_ptr = (gm_remote_ptr_t)sbuf_long; + gm_get(nal_data->gm_port, remote_ptr, rbuf, + rlen, GM_LOW_PRIORITY, source_node, + GMNAL_GM_PORT, + gmnal_remote_get_callback, stxd); + GMNAL_GM_UNLOCK(nal_data); + } + /* + * at the end of 1 iov element + */ + sbuf+=rlen; + slen-=rlen; + riov++; + nriov--; + rbuf = riov->iov_base; + rlen = riov->iov_len; + } else if (rlen > slen) { + ncalls++; + if (do_copy) { + CDEBUG(D_INFO, "slengm_port, remote_ptr, rbuf, + slen, GM_LOW_PRIORITY, source_node, + GMNAL_GM_PORT, + gmnal_remote_get_callback, stxd); + GMNAL_GM_UNLOCK(nal_data); + } + /* + * at end of siov element + */ + rbuf+=slen; + rlen-=slen; + siov++; + sbuf = siov->iov_base; + slen = siov->iov_len; + } else { + ncalls++; + if (do_copy) { + CDEBUG(D_INFO, "rlen=slen\n"); + GMNAL_GM_LOCK(nal_data); + sbuf_long = (unsigned long) sbuf; + remote_ptr = (gm_remote_ptr_t)sbuf_long; + gm_get(nal_data->gm_port, remote_ptr, rbuf, + rlen, GM_LOW_PRIORITY, source_node, + GMNAL_GM_PORT, + gmnal_remote_get_callback, stxd); + GMNAL_GM_UNLOCK(nal_data); + } + /* + * at end of siov and riov element + */ + siov++; + sbuf = siov->iov_base; + slen = siov->iov_len; + riov++; + nriov--; + rbuf = riov->iov_base; + rlen = riov->iov_len; + } + + } while (nriov); + return(ncalls); +} + + +/* + * The callback function that is invoked after each gm_get call completes. + * Multiple callbacks may be invoked for 1 transaction, only the final + * callback has work to do. + */ +void +gmnal_remote_get_callback(gm_port_t *gm_port, void *context, + gm_status_t status) +{ + + gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; + gmnal_srxd_t *srxd = stxd->srxd; + nal_cb_t *nal_cb = srxd->nal_data->nal_cb; + int lastone; + struct iovec *riov; + int nriov; + gmnal_data_t *nal_data; + + CDEBUG(D_TRACE, "called for context [%p]\n", context); + + if (status != GM_SUCCESS) { + CDEBUG(D_ERROR, "reports error [%d][%s]\n", status, + gmnal_gm_error(status)); + } + + spin_lock(&srxd->callback_lock); + srxd->ncallbacks--; + srxd->callback_status |= status; + lastone = srxd->ncallbacks?0:1; + spin_unlock(&srxd->callback_lock); + nal_data = srxd->nal_data; + + /* + * everyone returns a send token + */ + gmnal_return_stxd(nal_data, stxd); + + if (!lastone) { + CDEBUG(D_ERROR, "NOT final callback context[%p]\n", srxd); + return; + } + + /* + * Let our client application proceed + */ + CDEBUG(D_ERROR, "final callback context[%p]\n", srxd); + if (lib_finalize(nal_cb, srxd, srxd->cookie) != PTL_OK) { + CDEBUG(D_INFO, "Call to lib_finalize failed for srxd [%p]\n", + srxd); + } + + /* + * send an ack to the sender to let him know we got the data + */ + gmnal_large_tx_ack(nal_data, srxd); + + /* + * Unregister the memory that was used + * This is a very slow business (slower then register) + */ + nriov = srxd->nriov; + riov = srxd->riov; + GMNAL_GM_LOCK(nal_data); + while (nriov--) { + CDEBUG(D_ERROR, "deregister memory [%p]\n", riov->iov_base); + if (gm_deregister_memory(srxd->nal_data->gm_port, + riov->iov_base, riov->iov_len)) { + CDEBUG(D_ERROR, "failed to deregister memory [%p]\n", + riov->iov_base); + } + riov++; + } + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(srxd->riov, sizeof(struct iovec)*nriov); + + /* + * repost the receive buffer (return receive token) + */ + GMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0); + GMNAL_GM_UNLOCK(nal_data); + + return; +} + + +/* + * Called on target node. + * After pulling data from a source node + * send an ack message to indicate the large transmit is complete. + */ +void +gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) +{ + + gmnal_stxd_t *stxd; + gmnal_msghdr_t *msghdr; + void *buffer = NULL; + unsigned int local_nid; + gm_status_t gm_status = GM_SUCCESS; + + CDEBUG(D_TRACE, "srxd[%p] target_node [%u]\n", srxd, + srxd->gm_source_node); + + GMNAL_GM_LOCK(nal_data); + gm_status = gm_global_id_to_node_id(nal_data->gm_port, + srxd->gm_source_node, &local_nid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_ERROR, "Failed to obtain local id\n"); + return; + } + CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid); + + stxd = gmnal_get_stxd(nal_data, 1); + CDEBUG(D_TRACE, "gmnal_large_tx_ack got stxd[%p]\n", stxd); + + stxd->nal_data = nal_data; + stxd->type = GMNAL_LARGE_MESSAGE_ACK; + + /* + * Copy gmnal_msg_hdr and portals header to the transmit buffer + * Then copy the data in + */ + buffer = stxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + + /* + * Add in the address of the original stxd from the sender node + * so it knows which thread to notify. + */ + msghdr->magic = GMNAL_MAGIC; + msghdr->type = GMNAL_LARGE_MESSAGE_ACK; + msghdr->sender_node_id = nal_data->gm_global_nid; + msghdr->stxd = srxd->source_stxd; + CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer); + + CDEBUG(D_INFO, "sending\n"); + stxd->msg_size= sizeof(gmnal_msghdr_t); + + + CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] + gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] + stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, + stxd->msg_size, srxd->gm_source_node, local_nid, stxd); + GMNAL_GM_LOCK(nal_data); + stxd->gm_priority = GM_LOW_PRIORITY; + stxd->gm_target_node = local_nid; + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, + stxd->gm_size, stxd->msg_size, + GM_LOW_PRIORITY, local_nid, + gmnal_large_tx_ack_callback, + (void*)stxd); + + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "gmnal_large_tx_ack :: done\n"); + + return; +} + + +/* + * A callback to indicate the small transmit operation is compete + * Check for errors and try to deal with them. + * Call lib_finalise to inform the client application that the + * send is complete and the memory can be reused. + * Return the stxd when finished with it (returns a send token) + */ +void +gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context, + gm_status_t status) +{ + gmnal_stxd_t *stxd = (gmnal_stxd_t*)context; + gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data; + + if (!stxd) { + CDEBUG(D_ERROR, "send completion event for unknown stxd\n"); + return; + } + CDEBUG(D_TRACE, "send completion event for stxd [%p] status is [%d]\n", + stxd, status); + gmnal_return_stxd(stxd->nal_data, stxd); + + GMNAL_GM_UNLOCK(nal_data); + return; +} + +/* + * Indicates the large transmit operation is compete. + * Called on transmit side (means data has been pulled by receiver + * or failed). + * Call lib_finalise to inform the client application that the send + * is complete, deregister the memory and return the stxd. + * Finally, report the rx buffer that the ack message was delivered in. + */ +void +gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd) +{ + nal_cb_t *nal_cb = nal_data->nal_cb; + gmnal_stxd_t *stxd = NULL; + gmnal_msghdr_t *msghdr = NULL; + void *buffer = NULL; + struct iovec *iov; + + + CDEBUG(D_TRACE, "gmnal_large_tx_ack_received buffer [%p]\n", buffer); + + buffer = srxd->buffer; + msghdr = (gmnal_msghdr_t*)buffer; + stxd = msghdr->stxd; + + CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd); + + if (lib_finalize(nal_cb, stxd, stxd->cookie) != PTL_OK) { + CDEBUG(D_INFO, "Call to lib_finalize failed for stxd [%p]\n", + stxd); + } + + /* + * extract the iovec from the stxd, deregister the memory. + * free the space used to store the iovec + */ + iov = stxd->iov; + while(stxd->niov--) { + CDEBUG(D_INFO, "deregister memory [%p] size ["LPSZ"]\n", + iov->iov_base, iov->iov_len); + GMNAL_GM_LOCK(nal_data); + gm_deregister_memory(nal_data->gm_port, iov->iov_base, + iov->iov_len); + GMNAL_GM_UNLOCK(nal_data); + iov++; + } + + /* + * return the send token + * TO DO It is bad to hold onto the send token so long? + */ + gmnal_return_stxd(nal_data, stxd); + + + /* + * requeue the receive buffer + */ + gmnal_rx_requeue_buffer(nal_data, srxd); + + + return; +} + + + + +EXPORT_SYMBOL(gmnal_rx_thread); +EXPORT_SYMBOL(gmnal_ct_thread); +EXPORT_SYMBOL(gmnal_pre_receive); +EXPORT_SYMBOL(gmnal_rx_requeue_buffer); +EXPORT_SYMBOL(gmnal_rx_bad); +EXPORT_SYMBOL(gmnal_small_rx); +EXPORT_SYMBOL(gmnal_large_tx); +EXPORT_SYMBOL(gmnal_large_tx_callback); +EXPORT_SYMBOL(gmnal_small_tx_callback); diff --git a/lustre/portals/knals/gmnal/gmnal_module.c b/lustre/portals/knals/gmnal/gmnal_module.c new file mode 100644 index 0000000..8e0f64c --- /dev/null +++ b/lustre/portals/knals/gmnal/gmnal_module.c @@ -0,0 +1,147 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "gmnal.h" + + +int gmnal_small_msg_size = 525312; +/* + * -1 indicates default value. + * This is 1 thread per cpu + * See start_kernel_threads + */ +int num_rx_threads = -1; +int num_stxds = 5; + +ptl_handle_ni_t kgmnal_ni; + + +int +gmnal_cmd(struct portal_ioctl_data *data, void *private) +{ + gmnal_data_t *nal_data = NULL; + char *name = NULL; + int nid = -2; + int gnid; + gm_status_t gm_status; + + + CDEBUG(D_TRACE, "gmnal_cmd [%d] private [%p]\n", + data->ioc_nal_cmd, private); + nal_data = (gmnal_data_t*)private; + switch(data->ioc_nal_cmd) { + /* + * just reuse already defined GET_NID. Should define GMNAL version + */ + case(GMNAL_IOC_GET_GNID): + + PORTAL_ALLOC(name, data->ioc_plen1); + copy_from_user(name, data->ioc_pbuf1, data->ioc_plen1); + + GMNAL_GM_LOCK(nal_data); + nid = gm_host_name_to_node_id(nal_data->gm_port, name); + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "Local node id is [%d]\n", nid); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_node_id_to_global_id(nal_data->gm_port, + nid, &gnid); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_INFO, "gm_node_id_to_global_id failed[%d]\n", + gm_status); + return(-1); + } + CDEBUG(D_INFO, "Global node is is [%u][%x]\n", gnid, gnid); + copy_to_user(data->ioc_pbuf2, &gnid, data->ioc_plen2); + break; + default: + CDEBUG(D_INFO, "gmnal_cmd UNKNOWN[%d]\n", data->ioc_nal_cmd); + data->ioc_nid2 = -1; + } + + + return(0); +} + + +static int __init +gmnal_load(void) +{ + int status; + CDEBUG(D_TRACE, "This is the gmnal module initialisation routine\n"); + + + + CDEBUG(D_INFO, "Calling gmnal_init\n"); + status = PtlNIInit(gmnal_init, 32, 4, 0, &kgmnal_ni); + if (status == PTL_OK) { + CDEBUG(D_INFO, "Portals GMNAL initialised ok kgmnal_ni\n"); + } else { + CDEBUG(D_INFO, "Portals GMNAL Failed to initialise\n"); + return(1); + + } + + CDEBUG(D_INFO, "Calling kportal_nal_register\n"); + /* + * global_nal_data is set by gmnal_init + */ + if (kportal_nal_register(GMNAL, &gmnal_cmd, global_nal_data) != 0) { + CDEBUG(D_INFO, "kportal_nal_register failed\n"); + return(1); + } + + CDEBUG(D_INFO, "Calling PORTAL_SYMBOL_REGISTER\n"); + PORTAL_SYMBOL_REGISTER(kgmnal_ni); + CDEBUG(D_INFO, "This is the end of the gmnal init routine"); + + + return(0); +} + + +static void __exit +gmnal_unload(void) +{ + + kportal_nal_unregister(GMNAL); + PORTAL_SYMBOL_UNREGISTER(kgmnal_ni); + gmnal_fini(); + global_nal_data = NULL; + return; +} + + +module_init(gmnal_load); + +module_exit(gmnal_unload); + +EXPORT_SYMBOL(kgmnal_ni); + +MODULE_PARM(gmnal_small_msg_size, "i"); +MODULE_PARM(num_rx_threads, "i"); +MODULE_PARM(num_stxds, "i"); + +MODULE_AUTHOR("Morgan Doyle"); + +MODULE_DESCRIPTION("A Portals kernel NAL for Myrinet GM."); + +MODULE_LICENSE("GPL"); diff --git a/lustre/portals/knals/gmnal/gmnal_utils.c b/lustre/portals/knals/gmnal/gmnal_utils.c new file mode 100644 index 0000000..84fc3a0 --- /dev/null +++ b/lustre/portals/knals/gmnal/gmnal_utils.c @@ -0,0 +1,1007 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +/* + * All utilities required by lgmanl + */ + +#include "gmnal.h" + +/* + * Am I one of the gmnal rxthreads ? + */ +int +gmnal_is_rxthread(gmnal_data_t *nal_data) +{ + int i; + for (i=0; irxthread_pid[i] == current->pid) + return(1); + } + return(0); +} + + +/* + * allocate a number of small tx buffers and register with GM + * so they are wired and set up for DMA. This is a costly operation. + * Also allocate a corrosponding descriptor to keep track of + * the buffer. + * Put all descriptors on singly linked list to be available to send + * function. + */ +int +gmnal_alloc_stxd(gmnal_data_t *nal_data) +{ + int ntx = 0, nstx = 0, i = 0, nrxt_stx = 10; + gmnal_stxd_t *txd = NULL; + void *txbuffer = NULL; + + CDEBUG(D_TRACE, "gmnal_alloc_small tx\n"); + + GMNAL_GM_LOCK(nal_data); + ntx = gm_num_send_tokens(nal_data->gm_port); + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "total number of send tokens available is [%d]\n", ntx); + + nstx = ntx/2; + /* + * num_stxds from gmnal_module.c + */ + nstx = num_stxds; + nrxt_stx = nstx + 1; + + CDEBUG(D_INFO, "Allocated [%d] send tokens to small messages\n", nstx); + + + /* + * A semaphore is initialised with the + * number of transmit tokens available. + * To get a stxd, acquire the token semaphore. + * this decrements the available token count + * (if no tokens you block here, someone returning a + * stxd will release the semaphore and wake you) + * When token is obtained acquire the spinlock + * to manipulate the list + */ + GMNAL_TXD_TOKEN_INIT(nal_data, nstx); + GMNAL_TXD_LOCK_INIT(nal_data); + GMNAL_RXT_TXD_TOKEN_INIT(nal_data, nrxt_stx); + GMNAL_RXT_TXD_LOCK_INIT(nal_data); + + for (i=0; i<=nstx; i++) { + PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t)); + if (!txd) { + CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i); + return(GMNAL_STATUS_NOMEM); + } + GMNAL_GM_LOCK(nal_data); + txbuffer = gm_dma_malloc(nal_data->gm_port, + GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_UNLOCK(nal_data); + if (!txbuffer) { + CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d], + size [%d]\n", i, + GMNAL_SMALL_MSG_SIZE(nal_data)); + PORTAL_FREE(txd, sizeof(gmnal_stxd_t)); + return(GMNAL_STATUS_FAIL); + } + txd->buffer = txbuffer; + txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data); + txd->gm_size = gm_min_size_for_length(txd->buffer_size); + txd->nal_data = (struct _gmnal_data_t*)nal_data; + txd->rxt = 0; + + txd->next = nal_data->stxd; + nal_data->stxd = txd; + CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], + size [%d]\n", txd, txd->buffer, txd->buffer_size); + } + + for (i=0; i<=nrxt_stx; i++) { + PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t)); + if (!txd) { + CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i); + return(GMNAL_STATUS_NOMEM); + } + GMNAL_GM_LOCK(nal_data); + txbuffer = gm_dma_malloc(nal_data->gm_port, + GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_UNLOCK(nal_data); + if (!txbuffer) { + CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d], + size [%d]\n", i, + GMNAL_SMALL_MSG_SIZE(nal_data)); + PORTAL_FREE(txd, sizeof(gmnal_stxd_t)); + return(GMNAL_STATUS_FAIL); + } + txd->buffer = txbuffer; + txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data); + txd->gm_size = gm_min_size_for_length(txd->buffer_size); + txd->nal_data = (struct _gmnal_data_t*)nal_data; + txd->rxt = 1; + + txd->next = nal_data->rxt_stxd; + nal_data->rxt_stxd = txd; + CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], + size [%d]\n", txd, txd->buffer, txd->buffer_size); + } + + return(GMNAL_STATUS_OK); +} + +/* Free the list of wired and gm_registered small tx buffers and + * the tx descriptors that go along with them. + */ +void +gmnal_free_stxd(gmnal_data_t *nal_data) +{ + gmnal_stxd_t *txd = nal_data->stxd, *_txd = NULL; + + CDEBUG(D_TRACE, "gmnal_free_small tx\n"); + + while(txd) { + CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], + size [%d]\n", txd, txd->buffer, txd->buffer_size); + _txd = txd; + txd = txd->next; + GMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _txd->buffer); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_txd, sizeof(gmnal_stxd_t)); + } + txd = nal_data->rxt_stxd; + while(txd) { + CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], + size [%d]\n", txd, txd->buffer, txd->buffer_size); + _txd = txd; + txd = txd->next; + GMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _txd->buffer); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_txd, sizeof(gmnal_stxd_t)); + } + return; +} + + +/* + * Get a txd from the list + * This get us a wired and gm_registered small tx buffer. + * This implicitly gets us a send token also. + */ +gmnal_stxd_t * +gmnal_get_stxd(gmnal_data_t *nal_data, int block) +{ + + gmnal_stxd_t *txd = NULL; + pid_t pid = current->pid; + + + CDEBUG(D_TRACE, "gmnal_get_stxd nal_data [%p] block[%d] pid [%d]\n", + nal_data, block, pid); + + if (gmnal_is_rxthread(nal_data)) { + CDEBUG(D_INFO, "RXTHREAD Attempting to get token\n"); + GMNAL_RXT_TXD_GETTOKEN(nal_data); + GMNAL_RXT_TXD_LOCK(nal_data); + txd = nal_data->rxt_stxd; + if (txd) + nal_data->rxt_stxd = txd->next; + GMNAL_RXT_TXD_UNLOCK(nal_data); + CDEBUG(D_INFO, "RXTHREAD got [%p], head is [%p]\n", + txd, nal_data->rxt_stxd); + txd->kniov = 0; + txd->rxt = 1; + } else { + if (block) { + CDEBUG(D_INFO, "Attempting to get token\n"); + GMNAL_TXD_GETTOKEN(nal_data); + CDEBUG(D_PORTALS, "Got token\n"); + } else { + if (GMNAL_TXD_TRYGETTOKEN(nal_data)) { + CDEBUG(D_ERROR, "can't get token\n"); + return(NULL); + } + } + GMNAL_TXD_LOCK(nal_data); + txd = nal_data->stxd; + if (txd) + nal_data->stxd = txd->next; + GMNAL_TXD_UNLOCK(nal_data); + CDEBUG(D_INFO, "got [%p], head is [%p]\n", txd, + nal_data->stxd); + txd->kniov = 0; + } /* general txd get */ + return(txd); +} + +/* + * Return a txd to the list + */ +void +gmnal_return_stxd(gmnal_data_t *nal_data, gmnal_stxd_t *txd) +{ + CDEBUG(D_TRACE, "nal_data [%p], txd[%p] rxt[%d]\n", nal_data, + txd, txd->rxt); + + /* + * this transmit descriptor is + * for the rxthread + */ + if (txd->rxt) { + GMNAL_RXT_TXD_LOCK(nal_data); + txd->next = nal_data->rxt_stxd; + nal_data->rxt_stxd = txd; + GMNAL_RXT_TXD_UNLOCK(nal_data); + GMNAL_RXT_TXD_RETURNTOKEN(nal_data); + CDEBUG(D_INFO, "Returned stxd to rxthread list\n"); + } else { + GMNAL_TXD_LOCK(nal_data); + txd->next = nal_data->stxd; + nal_data->stxd = txd; + GMNAL_TXD_UNLOCK(nal_data); + GMNAL_TXD_RETURNTOKEN(nal_data); + CDEBUG(D_INFO, "Returned stxd to general list\n"); + } + return; +} + + +/* + * allocate a number of small rx buffers and register with GM + * so they are wired and set up for DMA. This is a costly operation. + * Also allocate a corrosponding descriptor to keep track of + * the buffer. + * Put all descriptors on singly linked list to be available to + * receive thread. + */ +int +gmnal_alloc_srxd(gmnal_data_t *nal_data) +{ + int nrx = 0, nsrx = 0, i = 0; + gmnal_srxd_t *rxd = NULL; + void *rxbuffer = NULL; + + CDEBUG(D_TRACE, "gmnal_alloc_small rx\n"); + + GMNAL_GM_LOCK(nal_data); + nrx = gm_num_receive_tokens(nal_data->gm_port); + GMNAL_GM_UNLOCK(nal_data); + CDEBUG(D_INFO, "total number of receive tokens available is [%d]\n", + nrx); + + nsrx = nrx/2; + nsrx = 12; + /* + * make the number of rxds twice our total + * number of stxds plus 1 + */ + nsrx = num_stxds*2 + 2; + + CDEBUG(D_INFO, "Allocated [%d] receive tokens to small messages\n", + nsrx); + + + GMNAL_GM_LOCK(nal_data); + nal_data->srxd_hash = gm_create_hash(gm_hash_compare_ptrs, + gm_hash_hash_ptr, 0, 0, nsrx, 0); + GMNAL_GM_UNLOCK(nal_data); + if (!nal_data->srxd_hash) { + CDEBUG(D_ERROR, "Failed to create hash table\n"); + return(GMNAL_STATUS_NOMEM); + } + + GMNAL_RXD_TOKEN_INIT(nal_data, nsrx); + GMNAL_RXD_LOCK_INIT(nal_data); + + for (i=0; i<=nsrx; i++) { + PORTAL_ALLOC(rxd, sizeof(gmnal_srxd_t)); + if (!rxd) { + CDEBUG(D_ERROR, "Failed to malloc rxd [%d]\n", i); + return(GMNAL_STATUS_NOMEM); + } +#if 0 + PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data)); + if (!rxbuffer) { + CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], + size [%d]\n", i, + GMNAL_SMALL_MSG_SIZE(nal_data)); + PORTAL_FREE(rxd, sizeof(gmnal_srxd_t)); + return(GMNAL_STATUS_FAIL); + } + CDEBUG(D_NET, "Calling gm_register_memory with port [%p] + rxbuffer [%p], size [%d]\n", nal_data->gm_port, + rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, + GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p], + index [%d]\n", rxbuffer, i); + switch(gm_status) { + case(GM_FAILURE): + CDEBUG(D_ERROR, "GM_FAILURE\n"); + break; + case(GM_PERMISSION_DENIED): + CDEBUG(D_ERROR, "PERMISSION_DENIED\n"); + break; + case(GM_INVALID_PARAMETER): + CDEBUG(D_ERROR, "INVALID_PARAMETER\n"); + break; + default: + CDEBUG(D_ERROR, "Unknown error[%d]\n", + gm_status); + break; + + } + return(GMNAL_STATUS_FAIL); + } +#else + GMNAL_GM_LOCK(nal_data); + rxbuffer = gm_dma_malloc(nal_data->gm_port, + GMNAL_SMALL_MSG_SIZE(nal_data)); + GMNAL_GM_UNLOCK(nal_data); + if (!rxbuffer) { + CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d], + size [%d]\n", i, + GMNAL_SMALL_MSG_SIZE(nal_data)); + PORTAL_FREE(rxd, sizeof(gmnal_srxd_t)); + return(GMNAL_STATUS_FAIL); + } +#endif + + rxd->buffer = rxbuffer; + rxd->size = GMNAL_SMALL_MSG_SIZE(nal_data); + rxd->gmsize = gm_min_size_for_length(rxd->size); + + if (gm_hash_insert(nal_data->srxd_hash, + (void*)rxbuffer, (void*)rxd)) { + + CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] + for rxbuffer[%p]\n", rxd, rxbuffer); + return(GMNAL_STATUS_FAIL); + } + + rxd->next = nal_data->srxd; + nal_data->srxd = rxd; + CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], + size [%d]\n", rxd, rxd->buffer, rxd->size); + } + + return(GMNAL_STATUS_OK); +} + + + +/* Free the list of wired and gm_registered small rx buffers and the + * rx descriptors that go along with them. + */ +void +gmnal_free_srxd(gmnal_data_t *nal_data) +{ + gmnal_srxd_t *rxd = nal_data->srxd, *_rxd = NULL; + + CDEBUG(D_TRACE, "gmnal_free_small rx\n"); + + while(rxd) { + CDEBUG(D_INFO, "Freeing rxd [%p] buffer [%p], size [%d]\n", + rxd, rxd->buffer, rxd->size); + _rxd = rxd; + rxd = rxd->next; + +#if 0 + GMNAL_GM_LOCK(nal_data); + gm_deregister_memory(nal_data->gm_port, _rxd->buffer, + _rxd->size); + GMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_rxd->buffer, GMNAL_SMALL_RXBUFFER_SIZE); +#else + GMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _rxd->buffer); + GMNAL_GM_UNLOCK(nal_data); +#endif + PORTAL_FREE(_rxd, sizeof(gmnal_srxd_t)); + } + return; +} + + +/* + * Get a rxd from the free list + * This get us a wired and gm_registered small rx buffer. + * This implicitly gets us a receive token also. + */ +gmnal_srxd_t * +gmnal_get_srxd(gmnal_data_t *nal_data, int block) +{ + + gmnal_srxd_t *rxd = NULL; + CDEBUG(D_TRACE, "nal_data [%p] block [%d]\n", nal_data, block); + + if (block) { + GMNAL_RXD_GETTOKEN(nal_data); + } else { + if (GMNAL_RXD_TRYGETTOKEN(nal_data)) { + CDEBUG(D_INFO, "gmnal_get_srxd Can't get token\n"); + return(NULL); + } + } + GMNAL_RXD_LOCK(nal_data); + rxd = nal_data->srxd; + if (rxd) + nal_data->srxd = rxd->next; + GMNAL_RXD_UNLOCK(nal_data); + CDEBUG(D_INFO, "got [%p], head is [%p]\n", rxd, nal_data->srxd); + return(rxd); +} + +/* + * Return an rxd to the list + */ +void +gmnal_return_srxd(gmnal_data_t *nal_data, gmnal_srxd_t *rxd) +{ + CDEBUG(D_TRACE, "nal_data [%p], rxd[%p]\n", nal_data, rxd); + + GMNAL_RXD_LOCK(nal_data); + rxd->next = nal_data->srxd; + nal_data->srxd = rxd; + GMNAL_RXD_UNLOCK(nal_data); + GMNAL_RXD_RETURNTOKEN(nal_data); + return; +} + +/* + * Given a pointer to a srxd find + * the relevant descriptor for it + * This is done by searching a hash + * list that is created when the srxd's + * are created + */ +gmnal_srxd_t * +gmnal_rxbuffer_to_srxd(gmnal_data_t *nal_data, void *rxbuffer) +{ + gmnal_srxd_t *srxd = NULL; + CDEBUG(D_TRACE, "nal_data [%p], rxbuffer [%p]\n", nal_data, rxbuffer); + srxd = gm_hash_find(nal_data->srxd_hash, rxbuffer); + CDEBUG(D_INFO, "srxd is [%p]\n", srxd); + return(srxd); +} + + +void +gmnal_stop_rxthread(gmnal_data_t *nal_data) +{ + int delay = 30; + + + + CDEBUG(D_TRACE, "Attempting to stop rxthread nal_data [%p]\n", + nal_data); + + nal_data->rxthread_stop_flag = GMNAL_THREAD_STOP; + + gmnal_remove_rxtwe(nal_data); + /* + * kick the thread + */ + up(&nal_data->rxtwe_wait); + + while(nal_data->rxthread_flag != GMNAL_THREAD_RESET && delay--) { + CDEBUG(D_INFO, "gmnal_stop_rxthread sleeping\n"); + gmnal_yield(1); + up(&nal_data->rxtwe_wait); + } + + if (nal_data->rxthread_flag != GMNAL_THREAD_RESET) { + CDEBUG(D_ERROR, "I don't know how to wake the thread\n"); + } else { + CDEBUG(D_INFO, "rx thread seems to have stopped\n"); + } +} + +void +gmnal_stop_ctthread(gmnal_data_t *nal_data) +{ + int delay = 15; + + + + CDEBUG(D_TRACE, "Attempting to stop ctthread nal_data [%p]\n", + nal_data); + + nal_data->ctthread_flag = GMNAL_THREAD_STOP; + GMNAL_GM_LOCK(nal_data); + gm_set_alarm(nal_data->gm_port, &nal_data->ctthread_alarm, 10, + NULL, NULL); + GMNAL_GM_UNLOCK(nal_data); + + while(nal_data->ctthread_flag == GMNAL_THREAD_STOP && delay--) { + CDEBUG(D_INFO, "gmnal_stop_ctthread sleeping\n"); + gmnal_yield(1); + } + + if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) { + CDEBUG(D_ERROR, "I DON'T KNOW HOW TO WAKE THE THREAD\n"); + } else { + CDEBUG(D_INFO, "CT THREAD SEEMS TO HAVE STOPPED\n"); + } +} + + + +char * +gmnal_gm_error(gm_status_t status) +{ + switch(status) { + case(GM_SUCCESS): + return("SUCCESS"); + case(GM_FAILURE): + return("FAILURE"); + case(GM_INPUT_BUFFER_TOO_SMALL): + return("INPUT_BUFFER_TOO_SMALL"); + case(GM_OUTPUT_BUFFER_TOO_SMALL): + return("OUTPUT_BUFFER_TOO_SMALL"); + case(GM_TRY_AGAIN ): + return("TRY_AGAIN"); + case(GM_BUSY): + return("BUSY"); + case(GM_MEMORY_FAULT): + return("MEMORY_FAULT"); + case(GM_INTERRUPTED): + return("INTERRUPTED"); + case(GM_INVALID_PARAMETER): + return("INVALID_PARAMETER"); + case(GM_OUT_OF_MEMORY): + return("OUT_OF_MEMORY"); + case(GM_INVALID_COMMAND): + return("INVALID_COMMAND"); + case(GM_PERMISSION_DENIED): + return("PERMISSION_DENIED"); + case(GM_INTERNAL_ERROR): + return("INTERNAL_ERROR"); + case(GM_UNATTACHED): + return("UNATTACHED"); + case(GM_UNSUPPORTED_DEVICE): + return("UNSUPPORTED_DEVICE"); + case(GM_SEND_TIMED_OUT): + return("GM_SEND_TIMEDOUT"); + case(GM_SEND_REJECTED): + return("GM_SEND_REJECTED"); + case(GM_SEND_TARGET_PORT_CLOSED): + return("GM_SEND_TARGET_PORT_CLOSED"); + case(GM_SEND_TARGET_NODE_UNREACHABLE): + return("GM_SEND_TARGET_NODE_UNREACHABLE"); + case(GM_SEND_DROPPED): + return("GM_SEND_DROPPED"); + case(GM_SEND_PORT_CLOSED): + return("GM_SEND_PORT_CLOSED"); + case(GM_NODE_ID_NOT_YET_SET): + return("GM_NODE_ID_NOT_YET_SET"); + case(GM_STILL_SHUTTING_DOWN): + return("GM_STILL_SHUTTING_DOWN"); + case(GM_CLONE_BUSY): + return("GM_CLONE_BUSY"); + case(GM_NO_SUCH_DEVICE): + return("GM_NO_SUCH_DEVICE"); + case(GM_ABORTED): + return("GM_ABORTED"); + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + return("GM_INCOMPATIBLE_LIB_AND_DRIVER"); + case(GM_UNTRANSLATED_SYSTEM_ERROR): + return("GM_UNTRANSLATED_SYSTEM_ERROR"); + case(GM_ACCESS_DENIED): + return("GM_ACCESS_DENIED"); + + +/* + * These ones are in the docs but aren't in the header file + case(GM_DEV_NOT_FOUND): + return("GM_DEV_NOT_FOUND"); + case(GM_INVALID_PORT_NUMBER): + return("GM_INVALID_PORT_NUMBER"); + case(GM_UC_ERROR): + return("GM_US_ERROR"); + case(GM_PAGE_TABLE_FULL): + return("GM_PAGE_TABLE_FULL"); + case(GM_MINOR_OVERFLOW): + return("GM_MINOR_OVERFLOW"); + case(GM_SEND_ORPHANED): + return("GM_SEND_ORPHANED"); + case(GM_HARDWARE_FAULT): + return("GM_HARDWARE_FAULT"); + case(GM_DATA_CORRUPTED): + return("GM_DATA_CORRUPTED"); + case(GM_TIMED_OUT): + return("GM_TIMED_OUT"); + case(GM_USER_ERROR): + return("GM_USER_ERROR"); + case(GM_NO_MATCH): + return("GM_NOMATCH"); + case(GM_NOT_SUPPORTED_IN_KERNEL): + return("GM_NOT_SUPPORTED_IN_KERNEL"); + case(GM_NOT_SUPPORTED_ON_ARCH): + return("GM_NOT_SUPPORTED_ON_ARCH"); + case(GM_PTE_REF_CNT_OVERFLOW): + return("GM_PTR_REF_CNT_OVERFLOW"); + case(GM_NO_DRIVER_SUPPORT): + return("GM_NO_DRIVER_SUPPORT"); + case(GM_FIRMWARE_NOT_RUNNING): + return("GM_FIRMWARE_NOT_RUNNING"); + + * These ones are in the docs but aren't in the header file + */ + default: + return("UNKNOWN GM ERROR CODE"); + } +} + + +char * +gmnal_rxevent(gm_recv_event_t *ev) +{ + short event; + event = GM_RECV_EVENT_TYPE(ev); + switch(event) { + case(GM_NO_RECV_EVENT): + return("GM_NO_RECV_EVENT"); + case(GM_SENDS_FAILED_EVENT): + return("GM_SEND_FAILED_EVENT"); + case(GM_ALARM_EVENT): + return("GM_ALARM_EVENT"); + case(GM_SENT_EVENT): + return("GM_SENT_EVENT"); + case(_GM_SLEEP_EVENT): + return("_GM_SLEEP_EVENT"); + case(GM_RAW_RECV_EVENT): + return("GM_RAW_RECV_EVENT"); + case(GM_BAD_SEND_DETECTED_EVENT): + return("GM_BAD_SEND_DETECTED_EVENT"); + case(GM_SEND_TOKEN_VIOLATION_EVENT): + return("GM_SEND_TOKEN_VIOLATION_EVENT"); + case(GM_RECV_TOKEN_VIOLATION_EVENT): + return("GM_RECV_TOKEN_VIOLATION_EVENT"); + case(GM_BAD_RECV_TOKEN_EVENT): + return("GM_BAD_RECV_TOKEN_EVENT"); + case(GM_ALARM_VIOLATION_EVENT): + return("GM_ALARM_VIOLATION_EVENT"); + case(GM_RECV_EVENT): + return("GM_RECV_EVENT"); + case(GM_HIGH_RECV_EVENT): + return("GM_HIGH_RECV_EVENT"); + case(GM_PEER_RECV_EVENT): + return("GM_PEER_RECV_EVENT"); + case(GM_HIGH_PEER_RECV_EVENT): + return("GM_HIGH_PEER_RECV_EVENT"); + case(GM_FAST_RECV_EVENT): + return("GM_FAST_RECV_EVENT"); + case(GM_FAST_HIGH_RECV_EVENT): + return("GM_FAST_HIGH_RECV_EVENT"); + case(GM_FAST_PEER_RECV_EVENT): + return("GM_FAST_PEER_RECV_EVENT"); + case(GM_FAST_HIGH_PEER_RECV_EVENT): + return("GM_FAST_HIGH_PEER_RECV_EVENT"); + case(GM_REJECTED_SEND_EVENT): + return("GM_REJECTED_SEND_EVENT"); + case(GM_ORPHANED_SEND_EVENT): + return("GM_ORPHANED_SEND_EVENT"); + case(GM_BAD_RESEND_DETECTED_EVENT): + return("GM_BAD_RESEND_DETETED_EVENT"); + case(GM_DROPPED_SEND_EVENT): + return("GM_DROPPED_SEND_EVENT"); + case(GM_BAD_SEND_VMA_EVENT): + return("GM_BAD_SEND_VMA_EVENT"); + case(GM_BAD_RECV_VMA_EVENT): + return("GM_BAD_RECV_VMA_EVENT"); + case(_GM_FLUSHED_ALARM_EVENT): + return("GM_FLUSHED_ALARM_EVENT"); + case(GM_SENT_TOKENS_EVENT): + return("GM_SENT_TOKENS_EVENTS"); + case(GM_IGNORE_RECV_EVENT): + return("GM_IGNORE_RECV_EVENT"); + case(GM_ETHERNET_RECV_EVENT): + return("GM_ETHERNET_RECV_EVENT"); + case(GM_NEW_NO_RECV_EVENT): + return("GM_NEW_NO_RECV_EVENT"); + case(GM_NEW_SENDS_FAILED_EVENT): + return("GM_NEW_SENDS_FAILED_EVENT"); + case(GM_NEW_ALARM_EVENT): + return("GM_NEW_ALARM_EVENT"); + case(GM_NEW_SENT_EVENT): + return("GM_NEW_SENT_EVENT"); + case(_GM_NEW_SLEEP_EVENT): + return("GM_NEW_SLEEP_EVENT"); + case(GM_NEW_RAW_RECV_EVENT): + return("GM_NEW_RAW_RECV_EVENT"); + case(GM_NEW_BAD_SEND_DETECTED_EVENT): + return("GM_NEW_BAD_SEND_DETECTED_EVENT"); + case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT): + return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT"); + case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT): + return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT"); + case(GM_NEW_BAD_RECV_TOKEN_EVENT): + return("GM_NEW_BAD_RECV_TOKEN_EVENT"); + case(GM_NEW_ALARM_VIOLATION_EVENT): + return("GM_NEW_ALARM_VIOLATION_EVENT"); + case(GM_NEW_RECV_EVENT): + return("GM_NEW_RECV_EVENT"); + case(GM_NEW_HIGH_RECV_EVENT): + return("GM_NEW_HIGH_RECV_EVENT"); + case(GM_NEW_PEER_RECV_EVENT): + return("GM_NEW_PEER_RECV_EVENT"); + case(GM_NEW_HIGH_PEER_RECV_EVENT): + return("GM_NEW_HIGH_PEER_RECV_EVENT"); + case(GM_NEW_FAST_RECV_EVENT): + return("GM_NEW_FAST_RECV_EVENT"); + case(GM_NEW_FAST_HIGH_RECV_EVENT): + return("GM_NEW_FAST_HIGH_RECV_EVENT"); + case(GM_NEW_FAST_PEER_RECV_EVENT): + return("GM_NEW_FAST_PEER_RECV_EVENT"); + case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT): + return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT"); + case(GM_NEW_REJECTED_SEND_EVENT): + return("GM_NEW_REJECTED_SEND_EVENT"); + case(GM_NEW_ORPHANED_SEND_EVENT): + return("GM_NEW_ORPHANED_SEND_EVENT"); + case(_GM_NEW_PUT_NOTIFICATION_EVENT): + return("_GM_NEW_PUT_NOTIFICATION_EVENT"); + case(GM_NEW_FREE_SEND_TOKEN_EVENT): + return("GM_NEW_FREE_SEND_TOKEN_EVENT"); + case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT): + return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT"); + case(GM_NEW_BAD_RESEND_DETECTED_EVENT): + return("GM_NEW_BAD_RESEND_DETECTED_EVENT"); + case(GM_NEW_DROPPED_SEND_EVENT): + return("GM_NEW_DROPPED_SEND_EVENT"); + case(GM_NEW_BAD_SEND_VMA_EVENT): + return("GM_NEW_BAD_SEND_VMA_EVENT"); + case(GM_NEW_BAD_RECV_VMA_EVENT): + return("GM_NEW_BAD_RECV_VMA_EVENT"); + case(_GM_NEW_FLUSHED_ALARM_EVENT): + return("GM_NEW_FLUSHED_ALARM_EVENT"); + case(GM_NEW_SENT_TOKENS_EVENT): + return("GM_NEW_SENT_TOKENS_EVENT"); + case(GM_NEW_IGNORE_RECV_EVENT): + return("GM_NEW_IGNORE_RECV_EVENT"); + case(GM_NEW_ETHERNET_RECV_EVENT): + return("GM_NEW_ETHERNET_RECV_EVENT"); + default: + return("Unknown Recv event"); +#if 0 + case(/* _GM_PUT_NOTIFICATION_EVENT */ + case(/* GM_FREE_SEND_TOKEN_EVENT */ + case(/* GM_FREE_HIGH_SEND_TOKEN_EVENT */ +#endif + } +} + + +void +gmnal_yield(int delay) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); +} + +int +gmnal_is_small_msg(gmnal_data_t *nal_data, int niov, struct iovec *iov, + int len) +{ + + CDEBUG(D_TRACE, "len [%d] limit[%d]\n", len, + GMNAL_SMALL_MSG_SIZE(nal_data)); + + if ((len + sizeof(ptl_hdr_t) + sizeof(gmnal_msghdr_t)) + < GMNAL_SMALL_MSG_SIZE(nal_data)) { + + CDEBUG(D_INFO, "Yep, small message\n"); + return(1); + } else { + CDEBUG(D_ERROR, "No, not small message\n"); + /* + * could be made up of lots of little ones ! + */ + return(0); + } + +} + +int +gmnal_add_rxtwe(gmnal_data_t *nal_data, gm_recv_event_t *rxevent) +{ + gmnal_rxtwe_t *we = NULL; + + CDEBUG(D_NET, "adding entry to list\n"); + + PORTAL_ALLOC(we, sizeof(gmnal_rxtwe_t)); + if (!we) { + CDEBUG(D_ERROR, "failed to malloc\n"); + return(GMNAL_STATUS_FAIL); + } + we->rx = rxevent; + + spin_lock(&nal_data->rxtwe_lock); + if (nal_data->rxtwe_tail) { + nal_data->rxtwe_tail->next = we; + } else { + nal_data->rxtwe_head = we; + nal_data->rxtwe_tail = we; + } + nal_data->rxtwe_tail = we; + spin_unlock(&nal_data->rxtwe_lock); + + up(&nal_data->rxtwe_wait); + return(GMNAL_STATUS_OK); +} + +void +gmnal_remove_rxtwe(gmnal_data_t *nal_data) +{ + gmnal_rxtwe_t *_we, *we = nal_data->rxtwe_head; + + CDEBUG(D_NET, "removing all work list entries\n"); + + spin_lock(&nal_data->rxtwe_lock); + CDEBUG(D_NET, "Got lock\n"); + while (we) { + _we = we; + we = we->next; + PORTAL_FREE(_we, sizeof(gmnal_rxtwe_t)); + } + spin_unlock(&nal_data->rxtwe_lock); + nal_data->rxtwe_head = NULL; + nal_data->rxtwe_tail = NULL; +} + +gmnal_rxtwe_t * +gmnal_get_rxtwe(gmnal_data_t *nal_data) +{ + gmnal_rxtwe_t *we = NULL; + + CDEBUG(D_NET, "Getting entry to list\n"); + + do { + down(&nal_data->rxtwe_wait); + if (nal_data->rxthread_stop_flag == GMNAL_THREAD_STOP) { + /* + * time to stop + * TO DO some one free the work entries + */ + return(NULL); + } + spin_lock(&nal_data->rxtwe_lock); + if (nal_data->rxtwe_head) { + CDEBUG(D_WARNING, "Got a work entry\n"); + we = nal_data->rxtwe_head; + nal_data->rxtwe_head = we->next; + if (!nal_data->rxtwe_head) + nal_data->rxtwe_tail = NULL; + } else { + CDEBUG(D_WARNING, "woken but no work\n"); + } + spin_unlock(&nal_data->rxtwe_lock); + } while (!we); + + CDEBUG(D_WARNING, "Returning we[%p]\n", we); + return(we); +} + + +/* + * Start the caretaker thread and a number of receiver threads + * The caretaker thread gets events from the gm library. + * It passes receive events to the receiver threads via a work list. + * It processes other events itself in gm_unknown. These will be + * callback events or sleeps. + */ +int +gmnal_start_kernel_threads(gmnal_data_t *nal_data) +{ + + int threads = 0; + /* + * the alarm is used to wake the caretaker thread from + * gm_unknown call (sleeping) to exit it. + */ + CDEBUG(D_NET, "Initializing caretaker thread alarm and flag\n"); + gm_initialize_alarm(&nal_data->ctthread_alarm); + nal_data->ctthread_flag = GMNAL_THREAD_RESET; + + + CDEBUG(D_INFO, "Starting caretaker thread\n"); + nal_data->ctthread_pid = + kernel_thread(gmnal_ct_thread, (void*)nal_data, 0); + if (nal_data->ctthread_pid <= 0) { + CDEBUG(D_ERROR, "Caretaker thread failed to start\n"); + return(GMNAL_STATUS_FAIL); + } + + while (nal_data->rxthread_flag != GMNAL_THREAD_RESET) { + gmnal_yield(1); + CDEBUG(D_INFO, "Waiting for caretaker thread signs of life\n"); + } + + CDEBUG(D_INFO, "caretaker thread has started\n"); + + + /* + * Now start a number of receiver threads + * these treads get work to do from the caretaker (ct) thread + */ + nal_data->rxthread_flag = GMNAL_THREAD_RESET; + nal_data->rxthread_stop_flag = GMNAL_THREAD_RESET; + + for (threads=0; threadsrxthread_pid[threads] = -1; + spin_lock_init(&nal_data->rxtwe_lock); + spin_lock_init(&nal_data->rxthread_flag_lock); + sema_init(&nal_data->rxtwe_wait, 0); + nal_data->rxtwe_head = NULL; + nal_data->rxtwe_tail = NULL; + /* + * If the default number of receive threades isn't + * modified at load time, then start one thread per cpu + */ + if (num_rx_threads == -1) + num_rx_threads = smp_num_cpus; + CDEBUG(D_INFO, "Starting [%d] receive threads\n", num_rx_threads); + for (threads=0; threadsrxthread_pid[threads] = + kernel_thread(gmnal_rx_thread, (void*)nal_data, 0); + if (nal_data->rxthread_pid[threads] <= 0) { + CDEBUG(D_ERROR, "Receive thread failed to start\n"); + gmnal_stop_rxthread(nal_data); + gmnal_stop_ctthread(nal_data); + return(GMNAL_STATUS_FAIL); + } + } + + for (;;) { + spin_lock(&nal_data->rxthread_flag_lock); + if (nal_data->rxthread_flag == GMNAL_RXTHREADS_STARTED) { + spin_unlock(&nal_data->rxthread_flag_lock); + break; + } + spin_unlock(&nal_data->rxthread_flag_lock); + gmnal_yield(1); + } + + CDEBUG(D_INFO, "receive threads seem to have started\n"); + + return(GMNAL_STATUS_OK); +} + +EXPORT_SYMBOL(gmnal_yield); +EXPORT_SYMBOL(gmnal_alloc_srxd); +EXPORT_SYMBOL(gmnal_get_srxd); +EXPORT_SYMBOL(gmnal_return_srxd); +EXPORT_SYMBOL(gmnal_free_srxd); +EXPORT_SYMBOL(gmnal_alloc_stxd); +EXPORT_SYMBOL(gmnal_get_stxd); +EXPORT_SYMBOL(gmnal_return_stxd); +EXPORT_SYMBOL(gmnal_free_stxd); +EXPORT_SYMBOL(gmnal_rxbuffer_to_srxd); +EXPORT_SYMBOL(gmnal_rxevent); +EXPORT_SYMBOL(gmnal_gm_error); +EXPORT_SYMBOL(gmnal_stop_ctthread); +EXPORT_SYMBOL(gmnal_add_rxtwe); +EXPORT_SYMBOL(gmnal_get_rxtwe); diff --git a/lustre/portals/knals/lgmnal/Makefile.am b/lustre/portals/knals/lgmnal/Makefile.am deleted file mode 100644 index 6794494..0000000 --- a/lustre/portals/knals/lgmnal/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = lgmnal -modulenet_DATA = lgmnal.o -EXTRA_PROGRAMS = lgmnal - -DEFS = -lgmnal_SOURCES = lgmnal.h lgmnal_api.c lgmnal_cb.c lgmnal_comm.c lgmnal_utils.c lgmnal_module.c diff --git a/lustre/portals/knals/lgmnal/lgmnal.h b/lustre/portals/knals/lgmnal/lgmnal.h deleted file mode 100644 index 8b496ec..0000000 --- a/lustre/portals/knals/lgmnal/lgmnal.h +++ /dev/null @@ -1,463 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Portals GM kernel NAL header file - * This file makes all declaration and prototypes - * for the API side and CB side of the NAL - */ -#ifndef __INCLUDE_LGMNAL_H__ -#define __INCLUDE_LGMNAL_H__ - -#include "linux/config.h" -#include "linux/module.h" -#include "linux/tty.h" -#include "linux/kernel.h" -#include "linux/mm.h" -#include "linux/string.h" -#include "linux/stat.h" -#include "linux/errno.h" -#include "linux/locks.h" -#include "linux/unistd.h" -#include "linux/init.h" -#include "linux/sem.h" -#include "linux/vmalloc.h" -#ifdef MODVERSIONS -#include -#endif - - -#include "portals/nal.h" -#include "portals/api.h" -#include "portals/errno.h" -#include "linux/kp30.h" -#include "portals/p30.h" - -#include "portals/lib-nal.h" -#include "portals/lib-p30.h" - -#define GM_STRONG_TYPES 1 -#include "gm.h" -#include "gm_internal.h" - - -/* - * Defines for the API NAL - */ - - - -/* - * Small message size is configurable - * insmod can set small_msg_size - * which is used to populate nal_data.small_msg_size - */ -#define LGMNAL_SMALL_MESSAGE 1078 -#define LGMNAL_LARGE_MESSAGE_INIT 1079 -#define LGMNAL_LARGE_MESSAGE_ACK 1080 -#define LGMNAL_LARGE_MESSAGE_FINI 1081 - -extern int lgmnal_small_msg_size; -#define LGMNAL_SMALL_MSG_SIZE(a) a->small_msg_size -#define LGMNAL_IS_SMALL_MESSAGE(n,a,b,c) lgmnal_is_small_message(n, a, b, c) -#define LGMNAL_MAGIC 0x1234abcd - -typedef struct _lgmnal_hash { - void *key; - void *data; - struct _lgmnal_hash *next; - } lgmnal_hash_t; - -/* - * Small Transmit Descriptor - * A structre to keep track of a small transmit operation - * This structure has a one-to-one relationship with a small - * transmit buffer (both create by lgmnal_stxd_alloc). - * stxd has pointer to txbuffer and the hash table in nal_data - * allows us to go the other way. - */ -typedef struct _lgmnal_stxd_t { - void *buffer; /* Address of small wired buffer this decriptor uses */ - int size; /* size (in bytes) of the tx buffer this descripto uses */ - gm_size_t gmsize; /* gmsize of the tx buffer this descripto uses */ - int type; /* large or small message */ - struct _lgmnal_data_t *nal_data; - lib_msg_t *cookie; /* the cookie the portals library gave us */ - int niov; - struct iovec iov[PTL_MD_MAX_IOV]; - struct _lgmnal_stxd_t *next; -} lgmnal_stxd_t; - -/* - * as for lgmnal_stxd_t - */ -typedef struct _lgmnal_srxd_t { - void *buffer; - int size; - gm_size_t gmsize; - int type; - struct _lgmnal_srxd_t *next; -} lgmnal_srxd_t; - -/* - * Header which lmgnal puts at the start of each message - */ -typedef struct _lgmnal_msghdr { - int magic; - int type; - unsigned int sender_node_id; - lgmnal_stxd_t *stxd; - } lgmnal_msghdr_t; -#define LGMNAL_MSGHDR_SIZE sizeof(lgmnal_msghdr_t) - -/* - * There's one of these for each interface that is initialised - * There's a maximum of LGMNAL_NUM_IF lgmnal_data_t - */ - -typedef struct _lgmnal_data_t { - int refcnt; -#ifdef LGMNAL_API_LOCK_SPIN - spinlock_t api_lock; /* lock provided for api->lock function */ -#else - struct semaphore api_lock; -#endif - spinlock_t cb_lock; /* lock provided for cb_cli function */ - char _cb_file[128]; - char _cb_function[128]; - int _cb_line; - spinlock_t stxd_lock; /* lock to add or remove stxd to/from free list */ - struct semaphore stxd_token; /* Don't try to access the list until get a token */ - lgmnal_stxd_t *stxd; /* list of free stxd's */ -#ifdef LGMNAL_USE_GM_HASH - struct gm_hash *stxd_hash; /* hash to translate txbuffer to stxd. Created in stxd_alloc */ -#else - lgmnal_hash_t *stxd_hash; /* hash to translate txbuffer to stxd. Created in stxd_alloc */ -#endif - spinlock_t srxd_lock; - struct semaphore srxd_token; - lgmnal_srxd_t *srxd; -#ifdef LGMNAL_USE_GM_HASH - struct gm_hash *srxd_hash; -#else - lgmnal_hash_t *srxd_hash; -#endif - nal_t *nal; /* our API NAL */ - nal_cb_t *nal_cb; /* our CB nal */ - struct gm_port *gm_port; /* the gm port structure we open in lgmnal_init */ - unsigned int gm_local_nid; /* our gm local node id */ - unsigned int gm_global_nid; /* our gm global node id */ - spinlock_t gm_lock; /* GM is not threadsage */ - long rxthread_pid; /* thread id of our receiver thread */ - int rxthread_flag; /* stop the thread flag */ - gm_alarm_t rxthread_alarm; /* used to wake sleeping rx thread */ - int small_msg_size; - int small_msg_gmsize; - char _file[128]; - char _function[128]; - int _line; -} lgmnal_data_t; - -/* - * For nal_data->rxthread_flag - */ -#define LGMNAL_THREAD_START 444 -#define LGMNAL_THREAD_STARTED 333 -#define LGMNAL_THREAD_CONTINUE 777 -#define LGMNAL_THREAD_STOP 666 -#define LGMNAL_THREAD_STOPPED 555 - -#define LGMNAL_NUM_IF 1 - -#if 0 -/* - * A global structre to maintain 1 nal_data structure for each - * myrinet card that the user initialises (only tested for 1) - * To add or remove any nal_data structures from the ifs arrary the - * init_lock must be acquired. This is the only time this lock is acquired - */ -typedef struct _lgmnal_global_t { - int debug_level; - struct semaphore init_lock; - lgmnal_data_t *ifs[LGMNAL_NUM_IF]; -} lgmnal_global_t; - -extern lgmnal_data_t global_nal_data; -#define LGMNAL_DEBUG_LEVEL lgmnal_global.debug_level -#else -extern lgmnal_data_t *global_nal_data; -extern int lgmnal_debug_level; -#define LGMNAL_DEBUG_LEVEL lgmnal_debug_level -#endif - -/* - * The gm_port to use for lgmnal - */ -#define LGMNAL_GM_PORT 4 - -/* - * for ioctl get pid - */ -#define LGMNAL_IOC_GET_GNID 1 - -/* - * LGMNAL_DEBUG_LEVEL set by module load 0= level) lgmnal_print args -#else -#define LGMNAL_PRINT(level, args) -#endif - -#define LGMNAL_DEBUG_ERR 1 /* only report errors */ -#define LGMNAL_DEBUG_TRACE 2 /* on entering function */ -#define LGMNAL_DEBUG_V 3 /* debug */ -#define LGMNAL_DEBUG_VV 4 /* more debug */ - -/* - * Return codes - */ -#define LGMNAL_STATUS_OK 0 -#define LGMNAL_STATUS_FAIL 1 -#define LGMNAL_STATUS_NOMEM 2 - - -/* - * FUNCTION PROTOTYPES - */ - -/* - * Locking macros - */ - -/* - * To access the global structure - * to add or remove interface (lgmnal_init) or shutdown only - */ -#define LGMNAL_GLOBAL_LOCK_INIT sema_init(&(lgmnal_global.init_lock), 1) -#define LGMNAL_GLOBAL_LOCK do { \ - LGMNAL_PRINT(1, ("Acquiring global mutex\n")); \ - down(&(lgmnal_global.init_lock)); \ - LGMNAL_PRINT(1, ("Got global lock\n")); \ - } while (0) -#define LGMNAL_GLOBAL_UNLOCK do { \ - LGMNAL_PRINT(1, ("Releasing global mutex\n")); \ - up(&(lgmnal_global.init_lock)); \ - LGMNAL_PRINT(1, ("Release global mutex\n")); \ - } while (0) - -/* - * For the API lock function - */ -#ifdef LGMNAL_API_LOCK_SPIN -#define LGMNAL_API_LOCK_INIT(a) spin_lock_init(&a->api_lock) -#define LGMNAL_API_LOCK(a) spin_lock(&a->api_lock) -#define LGMNAL_API_UNLOCK(a) spin_unlock(&a->api_lock) -#else -#define LGMNAL_API_LOCK_INIT(a) sema_init(&a->api_lock, 1) -#define LGMNAL_API_LOCK(a) down(&a->api_lock) -#define LGMNAL_API_UNLOCK(a) up(&a->api_lock) -#endif - -/* - * For the Small tx and rx descriptor lists - */ -#define LGMNAL_TXD_LOCK_INIT(a) spin_lock_init(&a->stxd_lock); -#define LGMNAL_TXD_LOCK(a) spin_lock(&a->stxd_lock); -#define LGMNAL_TXD_UNLOCK(a) spin_unlock(&a->stxd_lock); -#define LGMNAL_TXD_TOKEN_INIT(a, n) sema_init(&a->stxd_token, n); -#define LGMNAL_TXD_GETTOKEN(a) down(&a->stxd_token); -#define LGMNAL_TXD_TRYGETTOKEN(a) down_trylock(&a->stxd_token) -#define LGMNAL_TXD_RETURNTOKEN(a) up(&a->stxd_token); - - -#define LGMNAL_RXD_LOCK_INIT(a) spin_lock_init(&a->srxd_lock); -#define LGMNAL_RXD_LOCK(a) spin_lock(&a->srxd_lock); -#define LGMNAL_RXD_UNLOCK(a) spin_unlock(&a->srxd_lock); -#define LGMNAL_RXD_TOKEN_INIT(a, n) sema_init(&a->srxd_token, n); -#define LGMNAL_RXD_GETTOKEN(a) down(&a->srxd_token); -#define LGMNAL_RXD_TRYGETTOKEN(a) down_trylock(&a->srxd_token) -#define LGMNAL_RXD_RETURNTOKEN(a) up(&a->srxd_token); - -#define LGMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock); -#define LGMNAL_GM_LOCK(a) do { \ - while (!spin_trylock(&a->gm_lock)) { \ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("waiting %s:%s:%d holder %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__, nal_data->_function, nal_data->_file, nal_data->_line)); \ - lgmnal_yield(128); \ - } \ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("GM Locked %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__)); \ - sprintf(nal_data->_function, "%s", __FUNCTION__); \ - sprintf(nal_data->_file, "%s", __FILE__); \ - nal_data->_line = __LINE__; \ - } while (0) -#define LGMNAL_GM_UNLOCK(a) do { \ - spin_unlock(&a->gm_lock); \ - memset(nal_data->_function, 0, 128); \ - memset(nal_data->_file, 0, 128); \ - nal_data->_line = 0; \ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("GM Unlocked %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__)); \ - } while(0); - -#define LGMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock); - - -/* - * API NAL - */ -int lgmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); - -int lgmnal_api_shutdown(nal_t *, int); - -int lgmnal_api_validate(nal_t *, void *, size_t); - -void lgmnal_api_yield(nal_t *); - -void lgmnal_api_lock(nal_t *, unsigned long *); - -void lgmnal_api_unlock(nal_t *, unsigned long *); - - -#define LGMNAL_INIT_NAL(a) do { \ - a->forward = lgmnal_api_forward; \ - a->shutdown = lgmnal_api_shutdown; \ - a->validate = NULL; \ - a->yield = lgmnal_api_yield; \ - a->lock = lgmnal_api_lock; \ - a->unlock = lgmnal_api_unlock; \ - a->timeout = NULL; \ - a->refct = 1; \ - a->nal_data = NULL; \ - } while (0) - - -/* - * CB NAL - */ - -int lgmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, - int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t); - -int lgmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, - int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t); - -int lgmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *, - unsigned int, struct iovec *, size_t, size_t); - -int lgmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *, - unsigned int, ptl_kiov_t *, size_t, size_t); - -int lgmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t); - -int lgmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t); - -int lgmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *); - -void *lgmnal_cb_malloc(nal_cb_t *, size_t); - -void lgmnal_cb_free(nal_cb_t *, void *, size_t); - -void lgmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **); - -int lgmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **); - -void lgmnal_cb_printf(nal_cb_t *, const char *fmt, ...); - -void lgmnal_cb_cli(nal_cb_t *, unsigned long *); - -void lgmnal_cb_sti(nal_cb_t *, unsigned long *); - -int lgmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); - -nal_t *lgmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); - -void lgmnal_fini(void); - - - -#define LGMNAL_INIT_NAL_CB(a) do { \ - a->cb_send = lgmnal_cb_send; \ - a->cb_send_pages = lgmnal_cb_send_pages; \ - a->cb_recv = lgmnal_cb_recv; \ - a->cb_recv_pages = lgmnal_cb_recv_pages; \ - a->cb_read = lgmnal_cb_read; \ - a->cb_write = lgmnal_cb_write; \ - a->cb_callback = lgmnal_cb_callback; \ - a->cb_malloc = lgmnal_cb_malloc; \ - a->cb_free = lgmnal_cb_free; \ - a->cb_map = NULL; \ - a->cb_unmap = NULL; \ - a->cb_printf = lgmnal_cb_printf; \ - a->cb_cli = lgmnal_cb_cli; \ - a->cb_sti = lgmnal_cb_sti; \ - a->cb_dist = lgmnal_cb_dist; \ - a->nal_data = NULL; \ - } while (0) - -/* - * lgmnal utilities - */ - -void lgmnal_print(const char *, ...); - -/* - * Small Transmit and Receive Descriptor Functions - */ -int lgmnal_alloc_stxd(lgmnal_data_t *); -void lgmnal_free_stxd(lgmnal_data_t *); -lgmnal_stxd_t* lgmnal_get_stxd(lgmnal_data_t *, int); -void lgmnal_return_stxd(lgmnal_data_t *, lgmnal_stxd_t *); - -int lgmnal_alloc_srxd(lgmnal_data_t *); -void lgmnal_free_srxd(lgmnal_data_t *); -lgmnal_srxd_t* lgmnal_get_srxd(lgmnal_data_t *, int); -void lgmnal_return_srxd(lgmnal_data_t *, lgmnal_srxd_t *); - -/* - * general utility functions - */ -lgmnal_srxd_t *lgmnal_rxbuffer_to_srxd(lgmnal_data_t *, void*); -lgmnal_stxd_t *lgmnal_txbuffer_to_stxd(lgmnal_data_t *, void*); -void lgmnal_stop_rxthread(lgmnal_data_t *); -void lgmnal_small_tx_done(gm_port_t *, void *, gm_status_t); -char *lgmnal_gm_error(gm_status_t); -char *lgmnal_rxevent(gm_recv_event_t*); -int lgmnal_is_small_message(lgmnal_data_t*, int, struct iovec*, int); - -void *lgmnal_hash_find(lgmnal_hash_t *, void*); -int lgmnal_hash_add(lgmnal_hash_t**, void*, void*); -void lgmnal_hash_free(lgmnal_hash_t**); - -/* - * Communication functions - */ -int lgmnal_receive_thread(void *); -int -lgmnal_small_transmit(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec*, int); - -int -lgmnal_small_receive2(nal_cb_t *, void *, lib_msg_t *, unsigned int, struct iovec *, size_t, size_t); - -void lgmnal_yield(int); - -#endif /*__INCLUDE_LGMNAL_H__*/ diff --git a/lustre/portals/knals/lgmnal/lgmnal_api.c b/lustre/portals/knals/lgmnal/lgmnal_api.c deleted file mode 100644 index 8322e83..0000000 --- a/lustre/portals/knals/lgmnal/lgmnal_api.c +++ /dev/null @@ -1,527 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Implements the API NAL functions - */ - -#include "lgmnal.h" - -lgmnal_data_t *global_nal_data = NULL; -/* - * lgmnal_api_forward - * This function takes a pack block of arguments from the NAL API - * module and passes them to the NAL CB module. The CB module unpacks - * the args and calls the appropriate function indicated by index. - * Typically this function is used to pass args between kernel and use - * space. - * As lgmanl exists entirely in kernel, just pass the arg block directly to - * the NAL CB, buy passing the args to lib_dispatch - * Arguments are - * nal_t nal Our nal - * int index the api function that initiated this call - * void *args packed block of function args - * size_t arg_len length of args block - * void *ret A return value for the API NAL - * size_t ret_len Size of the return value - * - */ - -int -lgmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, - void *ret, size_t ret_len) -{ - - nal_cb_t *nal_cb = NULL; - lgmnal_data_t *nal_data = NULL; - - - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_forward: nal [%p], index [%d], args [%p], arglen [%d], ret [%p], retlen [%d]\n", nal, index, args, arg_len, ret, ret_len)); - - if (!nal || !args || (index < 0) || (arg_len < 0)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Bad args to lgmnal_api_forward\n")); -#ifdef LGMNAL_DEBUG - if (!nal) - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No nal specified\n")); - if (!args) - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No args specified\n")); - if (index < 0) - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Index is negative[%d]\n", index)); - if (arg_len < 0) - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("arg_len is negative [%d]\n", arg_len)); -#endif - return (PTL_FAIL); - } - - if (ret && (ret_len <= 0)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Bad args to lgmnal_api_forward\n")); -#ifdef LGMNAL_DEBUG - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("ret_len is [%d]\n", ret_len)); -#endif - return (PTL_FAIL); - } - - - if (!nal->nal_data) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad nal, no nal data\n")); - return (PTL_FAIL); - } - - nal_data = nal->nal_data; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("nal_data is [%p]\n", nal_data)); - - if (!nal_data->nal_cb) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad nal_data, no nal_cb\n")); - return (PTL_FAIL); - } - - nal_cb = nal_data->nal_cb; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("nal_cb is [%p]\n", nal_cb)); - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_api_forward calling lib_dispatch\n")); - lib_dispatch(nal_cb, NULL, index, args, ret); - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_api_forward returns from lib_dispatch\n")); - - return(PTL_OK); -} - - -/* - * lgmnal_api_shutdown - * Close down this interface and free any resources associated with it - * nal_t nal our nal to shutdown - */ -int -lgmnal_api_shutdown(nal_t *nal, int interface) -{ - - lgmnal_data_t *nal_data = nal->nal_data; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_shutdown: nal_data [%p]\n", nal_data)); - - /* - * TO DO lgmnal_api_shutdown what is to be done? - */ - - return(PTL_OK); -} - - -/* - * lgmnal_api_validate - * validate a user address for use in communications - * There's nothing to be done here - */ -int -lgmnal_api_validate(nal_t *nal, void *base, size_t extent) -{ - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_validate : nal [%p], base [%p], extent [%d]\n", nal, base, extent)); - - return(PTL_OK); -} - - - -/* - * lgmnal_api_yield - * Give up the processor - */ -void -lgmnal_api_yield(nal_t *nal) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_yield : nal [%p]\n", nal)); - - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - - return; -} - - - -/* - * lgmnal_api_lock - * Take a threadsafe lock - */ -void -lgmnal_api_lock(nal_t *nal, unsigned long *flags) -{ - - lgmnal_data_t *nal_data; - nal_cb_t *nal_cb; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_lock : nal [%p], flagsa [%p] flags[%ul]\n", nal, flags, *flags)); - - nal_data = nal->nal_data; - nal_cb = nal_data->nal_cb; - - nal_cb->cb_cli(nal_cb, flags); -/* - LGMNAL_API_LOCK(nal_data); -*/ - - return; -} - -/* - * lgmnal_api_unlock - * Release a threadsafe lock - */ -void -lgmnal_api_unlock(nal_t *nal, unsigned long *flags) -{ - lgmnal_data_t *nal_data; - nal_cb_t *nal_cb; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_lock : nal [%p], flags [%p]\n", nal, flags)); - - nal_data = nal->nal_data; - if (!nal_data) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_api_unlock bad nal, no nal_data\n")); - } - nal_cb = nal_data->nal_cb; - if (!nal_cb) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_api_unlock bad nal_data, no nal_cb\n")); - } - - nal_cb->cb_sti(nal_cb, flags); -/* - LGMNAL_API_UNLOCK(nal_data); -*/ - - return; -} - - -nal_t * -lgmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, ptl_pid_t rpid) -{ - - nal_t *nal = NULL; - nal_cb_t *nal_cb = NULL; - lgmnal_data_t *nal_data = NULL; - lgmnal_srxd_t *srxd = NULL; - gm_status_t gm_status; - unsigned int local_nid = 0, global_nid = 0; - ptl_nid_t portals_nid; - ptl_pid_t portals_pid = 0; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_init : interface [%d], ptl_size [%d], ac_size[%d]\n", - interface, ptl_size, ac_size)); - - if ((interface < 0) || (interface > LGMNAL_NUM_IF) || (ptl_size <= 0) || (ac_size <= 0) ) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad args\n")); - return(NULL); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("parameters check out ok\n")); - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Acquired global lock\n")); - - - PORTAL_ALLOC(nal_data, sizeof(lgmnal_data_t)); - if (!nal_data) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("can't get memory\n")); - return(NULL); - } - memset(nal_data, 0, sizeof(lgmnal_data_t)); - /* - * set the small message buffer size - */ - nal_data->refcnt = 1; - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal_data[%p]\n", nal_data)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("small_msg_size is [%d]\n", nal_data->small_msg_size)); - - PORTAL_ALLOC(nal, sizeof(nal_t)); - if (!nal) { - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - return(NULL); - } - memset(nal, 0, sizeof(nal_t)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal[%p]\n", nal)); - - PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); - if (!nal_cb) { - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - return(NULL); - } - memset(nal_cb, 0, sizeof(nal_cb_t)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal_cb[%p]\n", nal_cb)); - - LGMNAL_INIT_NAL(nal); - LGMNAL_INIT_NAL_CB(nal_cb); - /* - * String them all together - */ - nal->nal_data = (void*)nal_data; - nal_cb->nal_data = (void*)nal_data; - nal_data->nal = nal; - nal_data->nal_cb = nal_cb; - - LGMNAL_API_LOCK_INIT(nal_data); - LGMNAL_CB_LOCK_INIT(nal_data); - LGMNAL_GM_LOCK_INIT(nal_data); - - - /* - * initialise the interface, - */ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling gm_init\n")); - if (gm_init() != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("call to gm_init failed\n")); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_open with interface [%d], port [%d], name [%s], version [%d]\n", interface, LGMNAL_GM_PORT, "lgmnal", GM_API_VERSION)); - - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_open(&nal_data->gm_port, 0, LGMNAL_GM_PORT, "lgmnal", GM_API_VERSION); - LGMNAL_GM_UNLOCK(nal_data); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_open returned [%d]\n", gm_status)); - if (gm_status == GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_open succeeded port[%p]\n", nal_data->gm_port)); - } else { - switch(gm_status) { - case(GM_INVALID_PARAMETER): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Invalid Parameter\n")); - break; - case(GM_BUSY): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. GM Busy\n")); - break; - case(GM_NO_SUCH_DEVICE): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. No such device\n")); - break; - case(GM_INCOMPATIBLE_LIB_AND_DRIVER): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Incompatile lib and driver\n")); - break; - case(GM_OUT_OF_MEMORY): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Out of Memory\n")); - break; - default: - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Unknow error code [%d]\n", gm_status)); - break; - } - LGMNAL_GM_LOCK(nal_data); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - - - nal_data->small_msg_size = lgmnal_small_msg_size; - nal_data->small_msg_gmsize = gm_min_size_for_length(lgmnal_small_msg_size); - - if (lgmnal_alloc_srxd(nal_data) != LGMNAL_STATUS_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to allocate small rx descriptors\n")); - lgmnal_free_stxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - - - /* - * Hang out a bunch of small receive buffers - * In fact hang them all out - */ - while((srxd = lgmnal_get_srxd(nal_data, 0))) { - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("giving [%p] to gm_provide_recvive_buffer\n", srxd->buffer)); - LGMNAL_GM_LOCK(nal_data); - gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, - srxd->gmsize, GM_LOW_PRIORITY, 0); - LGMNAL_GM_UNLOCK(nal_data); - } - - /* - * Allocate pools of small tx buffers and descriptors - */ - if (lgmnal_alloc_stxd(nal_data) != LGMNAL_STATUS_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to allocate small tx descriptors\n")); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - - /* - * Start the recieve thread - * Initialise the gm_alarm we will use to wake the thread is - * it needs to be stopped - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Initializing receive thread alarm and flag\n")); - gm_initialize_alarm(&nal_data->rxthread_alarm); - nal_data->rxthread_flag = LGMNAL_THREAD_START; - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Starting receive thread\n")); - nal_data->rxthread_pid = kernel_thread(lgmnal_receive_thread, (void*)nal_data, 0); - if (nal_data->rxthread_pid <= 0) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Receive thread failed to start\n")); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - while (nal_data->rxthread_flag != LGMNAL_THREAD_STARTED) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1024); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Waiting for receive thread signs of life\n")); - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("receive thread seems to have started\n")); - nal_data->rxthread_flag = LGMNAL_THREAD_CONTINUE; - - - - /* - * Initialise the portals library - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Getting node id\n")); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_get_node_id(nal_data->gm_port, &local_nid); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - lgmnal_stop_rxthread(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("can't determine node id\n")); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - nal_data->gm_local_nid = local_nid; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local node id is [%u]\n", local_nid)); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid, &global_nid); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to obtain global id\n")); - lgmnal_stop_rxthread(nal_data); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Global node id is [%u][%x]\n", global_nid)); - nal_data->gm_global_nid = global_nid; - -/* - pid = gm_getpid(); -*/ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_pid is [%u]\n", portals_pid)); - portals_nid = (unsigned long)global_nid; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_nid is [%lu]\n", portals_nid)); - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling lib_init\n")); - if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, ac_size) != PTL_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lib_init failed\n")); - lgmnal_stop_rxthread(nal_data); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); - return(NULL); - - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_init finished\n")); - global_nal_data = nal->nal_data; - return(nal); -} - - - -/* - * Called when module removed - */ -void lgmnal_fini() -{ - lgmnal_data_t *nal_data = global_nal_data; - nal_t *nal = nal_data->nal; - nal_cb_t *nal_cb = nal_data->nal_cb; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_fini\n")); - - PtlNIFini(lgmnal_ni); - lib_fini(nal_cb); - - lgmnal_stop_rxthread(nal_data); - lgmnal_free_stxd(nal_data); - lgmnal_free_srxd(nal_data); - LGMNAL_GM_LOCK(nal_data); - gm_close(nal_data->gm_port); - gm_finalize(); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(nal, sizeof(nal_t)); - PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); - PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); -} - -EXPORT_SYMBOL(lgmnal_init); -EXPORT_SYMBOL(lgmnal_fini); -EXPORT_SYMBOL(lgmnal_api_forward); -EXPORT_SYMBOL(lgmnal_api_validate); -EXPORT_SYMBOL(lgmnal_api_yield); -EXPORT_SYMBOL(lgmnal_api_lock); -EXPORT_SYMBOL(lgmnal_api_unlock); -EXPORT_SYMBOL(lgmnal_api_shutdown); diff --git a/lustre/portals/knals/lgmnal/lgmnal_cb.c b/lustre/portals/knals/lgmnal/lgmnal_cb.c deleted file mode 100644 index dcd5446..0000000 --- a/lustre/portals/knals/lgmnal/lgmnal_cb.c +++ /dev/null @@ -1,258 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -/* - * This file implements the nal cb functions - */ - - -#include "lgmnal.h" - -int lgmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) -{ - lgmnal_srxd_t *srxd = (lgmnal_srxd_t*)private; - int status = PTL_OK; - lgmnal_data_t *nal_data = nal_cb->nal_data; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_recv nal_cb [%p],private[%p], cookie[%p], niov[%d], iov [%p], mlen[%d], rlen[%d]\n", nal_cb, private, cookie, niov, iov, mlen, rlen)); - - if (srxd->type == LGMNAL_SMALL_MESSAGE) { - if (!LGMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, mlen)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_cb_recv. This is not a small message\n")); - } - status = lgmnal_small_receive2(nal_cb, private, cookie, niov, iov, mlen, rlen); - } - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_return status [%d]\n", status)); - return(status); -} - -int lgmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int kniov, ptl_kiov_t *kiov, size_t mlen, size_t rlen) -{ - lgmnal_srxd_t *srxd = (lgmnal_srxd_t*)private; - int status = PTL_OK; - struct iovec *iovec = NULL; - int i = 0; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_recv_pages nal_cb [%p],private[%p], cookie[%p], kniov[%d], kiov [%p], mlen[%d], rlen[%d]\n", nal_cb, private, cookie, kniov, kiov, mlen, rlen)); - - if (srxd->type == LGMNAL_SMALL_MESSAGE) { - PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov); - if (!iovec) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Can't malloc\n")); - return(LGMNAL_STATUS_FAIL); - } - - /* - * map each page and create an iovec for it - */ - for (i=0; ikiov_page, kiov->kiov_len, kiov->kiov_offset)); - iovec->iov_len = kiov->kiov_len; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling kmap", kiov->kiov_page)); - iovec->iov_base = kmap(kiov->kiov_page) + kiov->kiov_offset; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling iov_base is [%p]", iovec->iov_base)); - iovec->iov_len = kiov->kiov_len; - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("calling lgmnal_small_receive2\n")); - status = lgmnal_small_receive2(nal_cb, private, cookie, kniov, iovec, mlen, rlen); - PORTAL_FREE(iovec, sizeof(struct iovec)*kniov); - } - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_return status [%d]\n", status)); - return(status); -} - - -int lgmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, - int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, size_t len) -{ - - lgmnal_data_t *nal_data; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_sendnid [%lu] niov[%d] len[%d]\n", nid, niov, len)); - nal_data = nal_cb->nal_data; - - if (LGMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is a small message send\n")); - lgmnal_small_transmit(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("This is a large message send it is not supported yet\n")); -/* - lgmnal_large_transmit1(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); -*/ - return(LGMNAL_STATUS_FAIL); - } - return(PTL_OK); -} - -int lgmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, - int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len) -{ - - int i = 0; - lgmnal_data_t *nal_data; - struct iovec *iovec; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_send_pages nid [%lu] niov[%d] len[%d]\n", nid, kniov, len)); - nal_data = nal_cb->nal_data; - if (LGMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) { - /* TO DO fix small message for send pages */ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is a small message send\n")); - PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec)); - - for (i=0; ikiov_page, kiov->kiov_len, kiov->kiov_offset)); - iovec->iov_len = kiov->kiov_len; - iovec->iov_base = kmap(kiov->kiov_page) + kiov->kiov_offset; - iovec->iov_len = kiov->kiov_len; - } - lgmnal_small_transmit(nal_cb, private, cookie, hdr, type, nid, pid, kniov, iovec, len); - PORTAL_FREE(iovec, kniov*sizeof(struct iovec)); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("This is a large message send it is not supported yet\n")); -/* - lgmnal_large_transmit1(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); -*/ - return(LGMNAL_STATUS_FAIL); - } - return(PTL_OK); -} - -int lgmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst, user_ptr src, size_t len) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_read dst [%p] src [%p] len[%d]\n", dst, src, len)); - gm_bcopy(src, dst, len); - return(PTL_OK); -} - -int lgmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, void *src, size_t len) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_write :: dst [%p] src [%p] len[%d]\n", dst, src, len)); - gm_bcopy(src, dst, len); - return(PTL_OK); -} - -int lgmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_callback nal_cb[%p], private[%p], eq[%p], ev[%p]\n", nal_cb, private, eq, ev)); - - if (eq->event_callback != NULL) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("found callback\n")); - eq->event_callback(ev); - } - - return(PTL_OK); -} - -void *lgmnal_cb_malloc(nal_cb_t *nal_cb, size_t len) -{ - void *ptr = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_malloc len[%d]\n", len)); - PORTAL_ALLOC(ptr, len); - return(ptr); -} - -void lgmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_free :: buf[%p] len[%d]\n", buf, len)); - PORTAL_FREE(buf, len); - return; -} - -void lgmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, void **addrkey) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_unmap niov[%d] iov[%], addrkey[%p]\n", niov, iov, addrkey)); - return; -} - -int lgmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, void**addrkey) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_map niov[%d], iov[%p], addrkey[%p], niov, iov, addrkey\n")); - return(PTL_OK); -} - -void lgmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_printf\n")); - lgmnal_print(fmt); - return; -} - -void lgmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags) -{ - lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; - spinlock_t cb_lock = nal_data->cb_lock; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_cli\n")); -/* - local_irq_save(*flags); - spin_lock_irqsave(&cb_lock, *flags); -*/ - spin_lock(&cb_lock); - return; -} - -void lgmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags) -{ - lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; - spinlock_t cb_lock = nal_data->cb_lock; - -/* - local_irq_restore(*flags); - spin_unlock_irqrestore(&cb_lock, *flags); -*/ - spin_unlock(&cb_lock); - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_sti\n")); - return; -} - -int lgmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_dist\n")); - if (dist) - *dist = 27; - return(PTL_OK); -} - - - - -EXPORT_SYMBOL(lgmnal_cb_send); -EXPORT_SYMBOL(lgmnal_cb_send_pages); -EXPORT_SYMBOL(lgmnal_cb_recv); -EXPORT_SYMBOL(lgmnal_cb_recv_pages); -EXPORT_SYMBOL(lgmnal_cb_read); -EXPORT_SYMBOL(lgmnal_cb_write); -EXPORT_SYMBOL(lgmnal_cb_cli); -EXPORT_SYMBOL(lgmnal_cb_sti); -EXPORT_SYMBOL(lgmnal_cb_dist); -EXPORT_SYMBOL(lgmnal_cb_printf); -EXPORT_SYMBOL(lgmnal_cb_map); -EXPORT_SYMBOL(lgmnal_cb_unmap); -EXPORT_SYMBOL(lgmnal_cb_callback); -EXPORT_SYMBOL(lgmnal_cb_free); -EXPORT_SYMBOL(lgmnal_cb_malloc); diff --git a/lustre/portals/knals/lgmnal/lgmnal_comm.c b/lustre/portals/knals/lgmnal/lgmnal_comm.c deleted file mode 100644 index 4cd1b83..0000000 --- a/lustre/portals/knals/lgmnal/lgmnal_comm.c +++ /dev/null @@ -1,477 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -/* - * This file contains all lgmnal send and receive functions - */ - -#include "lgmnal.h" - -int -lgmnal_requeue_rxbuffer(lgmnal_data_t *nal_data, lgmnal_srxd_t *srxd) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_requeue_rxbuffer\n")); - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("requeueing srxd[%p] nal_data[%p]\n", srxd, nal_data)); - - LGMNAL_GM_LOCK(nal_data); - gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, - srxd->gmsize, GM_LOW_PRIORITY, 0 ); - LGMNAL_GM_UNLOCK(nal_data); - - return(LGMNAL_STATUS_OK); -} - - -/* - * Handle a bad message - * A bad message is one we don't expect or can't interpret - */ -int -lgmnal_badrx_message(lgmnal_data_t *nal_data, gm_recv_t *recv, lgmnal_srxd_t *srxd) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Can't handle message\n")); - - if (!srxd) - srxd = lgmnal_rxbuffer_to_srxd(nal_data, gm_ntohp(recv->buffer)); - if (srxd) { - lgmnal_requeue_rxbuffer(nal_data, srxd); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Can't find a descriptor for this buffer\n")); - /* - * get rid of it ? - */ - return(LGMNAL_STATUS_FAIL); - } - - return(LGMNAL_STATUS_OK); -} - - -/* - * Start processing a small message receive - * Get here from lgmnal_receive_thread - * Hand off to lib_parse, which calls cb_recv - * which hands back to lgmnal_small_receive2 - * Deal with all endian stuff here (if we can!) - */ -int -lgmnal_small_receive1(lgmnal_data_t *nal_data, gm_recv_t *recv) -{ - lgmnal_srxd_t *srxd = NULL; - void *buffer = NULL; - unsigned int snode, sport, type, length; - lgmnal_msghdr_t *lgmnal_msghdr; - ptl_hdr_t *portals_hdr; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_receive1 nal_data [%p], recv [%p]\n", nal_data, recv)); - - buffer = gm_ntohp(recv->buffer);; - snode = (int)gm_ntoh_u16(recv->sender_node_id); - sport = (int)gm_ntoh_u8(recv->sender_port_id); - type = (int)gm_ntoh_u8(recv->type); - buffer = gm_ntohp(recv->buffer); - length = (int) gm_ntohl(recv->length); - - lgmnal_msghdr = (lgmnal_msghdr_t*)buffer; - portals_hdr = (ptl_hdr_t*)(buffer+LGMNAL_MSGHDR_SIZE); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("rx_event:: Sender node [%d], Sender Port [%d], type [%d], length [%d], buffer [%p]\n", - snode, sport, type, length, buffer)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_msghdr:: Sender node [%u], magic [%lx], type [%d]\n", - lgmnal_msghdr->sender_node_id, lgmnal_msghdr->magic, lgmnal_msghdr->type)); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_hdr:: Sender node [%ul], dest_node [%ul]\n", - portals_hdr->src_nid, portals_hdr->dest_nid)); - - - /* - * Get a transmit descriptor for this message - */ - srxd = lgmnal_rxbuffer_to_srxd(nal_data, buffer); - LGMNAL_PRINT(LGMNAL_DEBUG, ("Back from lgmnal_rxbuffer_to_srxd\n")); - if (!srxd) { - LGMNAL_PRINT(LGMNAL_DEBUG, ("Failed to get receive descriptor for this buffer\n")); - lib_parse(nal_data->nal_cb, portals_hdr, srxd); - return(LGMNAL_STATUS_FAIL); - } - srxd->type = LGMNAL_SMALL_MESSAGE; - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling lib_parse buffer is [%p]\n", buffer+LGMNAL_MSGHDR_SIZE)); - /* - * control passes to lib, which calls cb_recv - * cb_recv is responsible for returning the buffer - * for future receive - */ - lib_parse(nal_data->nal_cb, portals_hdr, srxd); - - return(LGMNAL_STATUS_OK); -} - -/* - * Get here from lgmnal_receive_thread, lgmnal_small_receive1 - * lib_parse, cb_recv - * Put data from prewired receive buffer into users buffer(s) - * Hang out the receive buffer again for another receive - * Call lib_finalize - */ -int -lgmnal_small_receive2(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int niov, - struct iovec *iov, size_t mlen, size_t rlen) -{ - lgmnal_srxd_t *srxd = NULL; - void *buffer = NULL; - lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_receive2 niov [%d] mlen[%d]\n", niov, mlen)); - - if (!private) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_small_receive2 no context\n")); - lib_finalize(nal_cb, private, cookie); - return(PTL_FAIL); - } - - srxd = (lgmnal_srxd_t*)private; - buffer = srxd->buffer; - buffer += sizeof(lgmnal_msghdr_t); - buffer += sizeof(ptl_hdr_t); - - while(niov--) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing [%p] len [%d]\n", iov, iov->iov_len)); - gm_bcopy(buffer, iov->iov_base, iov->iov_len); - buffer += iov->iov_len; - iov++; - } - - - /* - * let portals library know receive is complete - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling lib_finalize\n")); - if (lib_finalize(nal_cb, private, cookie) != PTL_OK) { - /* TO DO what to do with failed lib_finalise? */ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lib_finalize failed\n")); - } - /* - * return buffer so it can be used again - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling gm_provide_receive_buffer\n")); - LGMNAL_GM_LOCK(nal_data); - gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, srxd->gmsize, GM_LOW_PRIORITY, 0); - LGMNAL_GM_UNLOCK(nal_data); - - return(PTL_OK); -} - - - -/* - * The recevive thread - * This guy wait in gm_blocking_recvive and gets - * woken up when the myrinet adaptor gets an interrupt. - * Hands off processing of small messages and blocks again - */ -int -lgmnal_receive_thread(void *arg) -{ - lgmnal_data_t *nal_data; - gm_recv_event_t *rxevent = NULL; - gm_recv_t *recv = NULL; - void *buffer; - - if (!arg) { - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("RXTHREAD:: This is the lgmnal_receive_thread. NO nal_data. Exiting\n", arg)); - return(-1); - } - - nal_data = (lgmnal_data_t*)arg; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("RXTHREAD:: This is the lgmnal_receive_thread nal_data is [%p]\n", arg)); - - nal_data->rxthread_flag = LGMNAL_THREAD_STARTED; - while (nal_data->rxthread_flag == LGMNAL_THREAD_STARTED) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: lgmnal_receive_threads waiting for LGMNAL_CONTINUE flag\n")); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1024); - - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: calling daemonize\n")); - daemonize(); - LGMNAL_GM_LOCK(nal_data); - while(nal_data->rxthread_flag == LGMNAL_THREAD_CONTINUE) { - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Receive thread waiting\n")); - rxevent = gm_blocking_receive_no_spin(nal_data->gm_port); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: receive thread got [%s]\n", lgmnal_rxevent(rxevent))); - if (nal_data->rxthread_flag != LGMNAL_THREAD_CONTINUE) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Receive thread time to exit\n")); - break; - } - switch (GM_RECV_EVENT_TYPE(rxevent)) { - - case(GM_RECV_EVENT): - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: GM_RECV_EVENT\n")); - recv = (gm_recv_t*)&(rxevent->recv); - buffer = gm_ntohp(recv->buffer); - if (((lgmnal_msghdr_t*)buffer)->type == LGMNAL_SMALL_MESSAGE) { - LGMNAL_GM_UNLOCK(nal_data); - lgmnal_small_receive1(nal_data, recv); - LGMNAL_GM_LOCK(nal_data); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("RXTHREAD:: Unsupported message type\n")); - lgmnal_badrx_message(nal_data, recv, NULL); - } - break; - case(_GM_SLEEP_EVENT): - /* - * Blocking receive above just returns - * immediatly with _GM_SLEEP_EVENT - * Don't know what this is - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Sleeping in gm_unknown\n")); - LGMNAL_GM_UNLOCK(nal_data); - gm_unknown(nal_data->gm_port, rxevent); - LGMNAL_GM_LOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Awake from gm_unknown\n")); - break; - - default: - /* - * Don't know what this is - * gm_unknown will make sense of it - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Passing event to gm_unknown\n")); - gm_unknown(nal_data->gm_port, rxevent); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Processed unknown event\n")); - - } - - - } - LGMNAL_GM_UNLOCK(nal_data); - nal_data->rxthread_flag = LGMNAL_THREAD_STOPPED; - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("RXTHREAD:: The lgmnal_receive_thread nal_data [%p] is exiting\n", nal_data)); - return(LGMNAL_STATUS_OK); -} - - -int -lgmnal_small_transmit(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, - ptl_nid_t global_nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, int size) -{ - lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; - lgmnal_stxd_t *stxd = NULL; - void *buffer = NULL; - lgmnal_msghdr_t *msghdr = NULL; - int tot_size = 0; - unsigned int local_nid; - gm_status_t gm_status = GM_SUCCESS; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_transmit nal_cb [%p] private [%p] cookie [%p] hdr [%p] type [%d] global_nid [%u][%x] pid [%d] niov [%d] iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, global_nid, global_nid, pid, niov, iov, size)); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_hdr:: dest_nid [%lu], src_nid [%lu]\n", hdr->dest_nid, hdr->src_nid)); - - if (!nal_data) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("no nal_data\n")); - return(LGMNAL_STATUS_FAIL); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("nal_data [%p]\n", nal_data)); - } - - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, &local_nid); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to obtain local id\n")); - return(LGMNAL_STATUS_FAIL); - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local Node_id is [%u][%x]\n", local_nid, local_nid)); - - stxd = lgmnal_get_stxd(nal_data, 1); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("stxd [%p]\n", stxd)); - - stxd->type = LGMNAL_SMALL_MESSAGE; - stxd->cookie = cookie; - - /* - * Copy lgmnal_msg_hdr and portals header to the transmit buffer - * Then copy the data in - */ - buffer = stxd->buffer; - msghdr = (lgmnal_msghdr_t*)buffer; - - msghdr->magic = LGMNAL_MAGIC; - msghdr->type = LGMNAL_SMALL_MESSAGE; - msghdr->sender_node_id = nal_data->gm_global_nid; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing msghdr at [%p]\n", buffer)); - - buffer += sizeof(lgmnal_msghdr_t); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Advancing buffer pointer by [%x] to [%p]\n", sizeof(lgmnal_msghdr_t), buffer)); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing portals hdr at [%p]\n", buffer)); - gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); - - buffer += sizeof(ptl_hdr_t); - - while(niov--) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing iov [%p] len [%d] to [%p]\n", iov, iov->iov_len, buffer)); - gm_bcopy(iov->iov_base, buffer, iov->iov_len); - buffer+= iov->iov_len; - iov++; - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("sending\n")); - tot_size = size+sizeof(ptl_hdr_t)+sizeof(lgmnal_msghdr_t); - - - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_send_to_peer port [%p] buffer [%p] gmsize [%d] msize [%d] global_nid [%u][%x] local_nid[%d] stxd [%p]\n", - nal_data->gm_port, stxd->buffer, stxd->gmsize, tot_size, global_nid, global_nid, local_nid, stxd)); - LGMNAL_GM_LOCK(nal_data); - gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, stxd->gmsize, tot_size, GM_LOW_PRIORITY, local_nid, lgmnal_small_tx_done, (void*)stxd); - - LGMNAL_GM_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("done\n")); - - return(PTL_OK); -} - - -void -lgmnal_small_tx_done(gm_port_t *gm_port, void *context, gm_status_t status) -{ - lgmnal_stxd_t *stxd = (lgmnal_stxd_t*)context; - lib_msg_t *cookie = stxd->cookie; - lgmnal_data_t *nal_data = (lgmnal_data_t*)stxd->nal_data; - nal_cb_t *nal_cb = nal_data->nal_cb; - - if (!stxd) { - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("send completion event for unknown stxd\n")); - return; - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Result of send stxd [%p] is [%s]\n", stxd, lgmnal_gm_error(status))); - /* TO DO figure out which sends are worth retrying and get a send token to retry */ - if (lib_finalize(nal_cb, stxd, cookie) != PTL_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Call to lib_finalize failed for stxd [%p]\n", stxd)); - } - lgmnal_return_stxd(nal_data, stxd); - return; -} - - -void -lgmnal_large_tx1_done(gm_port_t *gm_port, void *context, gm_status_t status) -{ - -} - -/* - * Begin a large transmit - */ -int -lgmnal_large_transmit1(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, - ptl_nid_t global_nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, int size) -{ - - lgmnal_data_t *nal_data; - lgmnal_stxd_t *stxd = NULL; - void *buffer = NULL; - lgmnal_msghdr_t *msghdr = NULL; - unsigned int local_nid; - int mlen = 0; /* the size of the init message data */ - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_large_transmit1 nal_cb [%p] private [%p], cookie [%p] hdr [%p], type [%d] global_nid [%u], pid [%d], - niov [%d], iov [%p], size [%d]\n", - nal_cb, private, cookie, hdr, type, global_nid, pid, niov, iov, size)); - - if (nal_cb) - nal_data = (lgmnal_data_t*)nal_cb->nal_data; - else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("no nal_cb.\n")); - return(LGMNAL_STATUS_FAIL); - } - - - /* - * TO DO large transmit uses stxd. Should it have control descriptor? - */ - stxd = lgmnal_get_stxd(nal_data, 1); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("stxd [%p]\n", stxd)); - - stxd->type = LGMNAL_LARGE_MESSAGE_INIT; - stxd->cookie = cookie; - - /* - * Copy lgmnal_msg_hdr and portals header to the transmit buffer - * Then copy the iov in - */ - buffer = stxd->buffer; - msghdr = (lgmnal_msghdr_t*)buffer; - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing msghdr at [%p]\n", buffer)); - - msghdr->magic = LGMNAL_MAGIC; - msghdr->type = LGMNAL_LARGE_MESSAGE_INIT; - msghdr->sender_node_id = nal_data->gm_global_nid; - msghdr->stxd = stxd; - buffer += sizeof(lgmnal_msghdr_t); - mlen = sizeof(lgmnal_msghdr_t); - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing portals hdr at [%p]\n", buffer)); - - gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); - buffer += sizeof(ptl_hdr_t); - mlen += sizeof(ptl_hdr_t); - - /* - * Store the iovs in the stxd for we can get them later - * in large_transmit2 - */ - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Copying iov [%p] to [%p]\n", iov, stxd->iov)); - gm_bcopy(iov, stxd->iov, niov*sizeof(struct iovec)); - stxd->niov = niov; - - /* - * Send the init message to the target - */ - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("sending mlen [%d]\n", mlen)); - LGMNAL_GM_LOCK(nal_data); - gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, stxd->gmsize, mlen, GM_LOW_PRIORITY, local_nid, lgmnal_large_tx1_done, (void*)stxd); - LGMNAL_GM_UNLOCK(nal_data); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("done\n")); - - return(PTL_OK); -} - - - - -EXPORT_SYMBOL(lgmnal_requeue_rxbuffer); -EXPORT_SYMBOL(lgmnal_badrx_message); -EXPORT_SYMBOL(lgmnal_large_tx1_done); -EXPORT_SYMBOL(lgmnal_large_transmit1); -EXPORT_SYMBOL(lgmnal_small_receive1); -EXPORT_SYMBOL(lgmnal_small_receive2); -EXPORT_SYMBOL(lgmnal_receive_thread); -EXPORT_SYMBOL(lgmnal_small_transmit); -EXPORT_SYMBOL(lgmnal_small_tx_done); diff --git a/lustre/portals/knals/lgmnal/lgmnal_module.c b/lustre/portals/knals/lgmnal/lgmnal_module.c deleted file mode 100644 index ce870f0..0000000 --- a/lustre/portals/knals/lgmnal/lgmnal_module.c +++ /dev/null @@ -1,137 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include "lgmnal.h" - - -ptl_handle_ni_t lgmnal_ni; - - -int -lgmnal_cmd(struct portal_ioctl_data *data, void *private) -{ - lgmnal_data_t *nal_data = NULL; - char *name = NULL; - int nid = -2; - int gnid; - gm_status_t gm_status; - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cmd [d] private [%p]\n", data->ioc_nal_cmd, private)); - nal_data = (lgmnal_data_t*)private; - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("nal_data is [%p]\n", nal_data)); - switch(data->ioc_nal_cmd) { - /* - * just reuse already defined GET_NID. Should define LGMNAL version - */ - case(LGMNAL_IOC_GET_GNID): - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_cmd GETNID (Get GM Global Network Id\n")); - - PORTAL_ALLOC(name, data->ioc_plen1); - copy_from_user(name, data->ioc_pbuf1, data->ioc_plen1); - - LGMNAL_GM_LOCK(nal_data); - nid = gm_host_name_to_node_id(nal_data->gm_port, name); - LGMNAL_GM_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local node id is [%d]\n", nid)); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_node_id_to_global_id(nal_data->gm_port, nid, &gnid); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_node_id_to_global_id failed\n", gm_status)); - return(-1); - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Global node is is [%u][%x]\n", gnid, gnid)); - copy_to_user(data->ioc_pbuf2, &gnid, data->ioc_plen2); - break; - default: - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_cmd UNKNOWN[%d]\n", data->ioc_nal_cmd)); - data->ioc_nid2 = -1; - } - - - return(0); -} - -int lgmnal_small_msg_size = 81920; -int lgmnal_debug_level = 1; - -int -init_module() -{ - int status; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("This is the lgmnal module initialisation routine\n")); - - - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling lgmnal_init\n")); - status = PtlNIInit(lgmnal_init, 32, 4, 0, &lgmnal_ni); - if (status == PTL_OK) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Portals LGMNAL initialised ok lgmnal_ni [%lx]\n", lgmnal_ni)); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Portals LGMNAL Failed to initialise\n")); - return(1); - - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling kportal_nal_register\n")); - /* - * global_nal_data is set by lgmnal_init - */ - if (kportal_nal_register(LGMNAL, &lgmnal_cmd, global_nal_data) != 0) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("kportal_nal_register failed\n")); - return(1); - } - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling PORTAL_SYMBOL_REGISTER\n")); - PORTAL_SYMBOL_REGISTER(lgmnal_ni); - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is the end of the lgmnal module initialisation routine")); - - - return(0); -} - - -void cleanup_module() -{ - int interface=0; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Cleaning up lgmnal module")); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Interface [%d] Calling shutdown\n", interface)); - kportal_nal_unregister(LGMNAL); - PORTAL_SYMBOL_UNREGISTER(lgmnal_ni); - lgmnal_fini(); - global_nal_data = NULL; - return; -} - - -EXPORT_SYMBOL(lgmnal_ni); -EXPORT_SYMBOL(lgmnal_debug_level); - -MODULE_PARM(lgmnal_small_msg_size, "i"); -MODULE_PARM(lgmnal_debug_level, "i"); - -MODULE_AUTHOR("Morgan Doyle. morgan.doyle@hp.com"); - -MODULE_DESCRIPTION("A Portals kernel NAL for Myrinet GM2. [0= DEFAULT_LEN) { - PORTAL_ALLOC(varbuf, len+1+16); - if (!varbuf) { - printk("LustreError: lgmnal_cb_printf Failed to malloc\n"); - printk("Lustre: Truncated message is\n"); - printk(fixedbuf); - va_end(ap); - return; - } - sprintf(varbuf, "Lustre: LGMNAL::"); - len = vsnprintf(varbuf+16, len+1, fmt, ap); - } else { - varbuf = fixedbuf; - } - va_end(ap); - printk(varbuf); - if (fixedbuf != varbuf) - PORTAL_FREE(varbuf, len+1+16); - return; -} - - -/* - * allocate a number of small tx buffers and register with GM - * so they are wired and set up for DMA. This is a costly operation. - * Also allocate a corrosponding descriptor to keep track of - * the buffer. - * Put all descriptors on singly linked list to be available to send function. - * This function is only called when the API mutex is held (init or shutdown), - * so there is no need to hold the txd spinlock. - */ -int -lgmnal_alloc_stxd(lgmnal_data_t *nal_data) -{ - int ntx = 0, nstx = 0, i = 0; - lgmnal_stxd_t *txd = NULL; - void *txbuffer = NULL; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_alloc_small tx\n")); - - LGMNAL_GM_LOCK(nal_data); - ntx = gm_num_send_tokens(nal_data->gm_port); - LGMNAL_GM_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("total number of send tokens available is [%d]\n", ntx)); - - nstx = ntx/2; - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocated [%d] send tokens to small messages\n", nstx)); - - -#ifdef LGMNAL_USE_GM_HASH - nal_data->stxd_hash = gm_create_hash(gm_hash_compare_ptrs, gm_hash_hash_ptr, 0, sizeof(void*), nstx, 0); - if (!nal_data->srxd_hash) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to create hash table\n\n")); - return(LGMNAL_STATUS_NOMEM); - } -#else - nal_data->stxd_hash = NULL; -#endif - - /* - * A semaphore is initialised with the - * number of transmit tokens available. - * To get a stxd, acquire the token semaphore. - * this decrements the available token count - * (if no tokens you block here, someone returning a - * stxd will release the semaphore and wake you) - * When token is obtained acquire the spinlock - * to manipulate the list - */ - LGMNAL_TXD_TOKEN_INIT(nal_data, nstx); - LGMNAL_TXD_LOCK_INIT(nal_data); - - for (i=0; i<=nstx; i++) { - PORTAL_ALLOC(txd, sizeof(lgmnal_stxd_t)); - if (!txd) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc txd [%d]\n", i)); - return(LGMNAL_STATUS_NOMEM); - } -#if 0 - PORTAL_ALLOC(txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); - if (!txbuffer) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc txbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); - PORTAL_FREE(txd, sizeof(lgmnal_stxd_t)); - return(LGMNAL_STATUS_FAIL); - } - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_register_memory with port [%p] txbuffer [%p], size [%d]\n", - nal_data->gm_port, txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data))); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_register_memory(nal_data->gm_port, txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_register_memory failed buffer [%p], index [%d]\n", txbuffer, i)); - switch(gm_status) { - case(GM_FAILURE): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_FAILURE\n")); - break; - case(GM_PERMISSION_DENIED): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_PERMISSION_DENIED\n")); - break; - case(GM_INVALID_PARAMETER): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_INVALID_PARAMETER\n")); - break; - default: - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Unknown error\n")); - break; - } - return(LGMNAL_STATUS_FAIL); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_register_memory ok for buffer [%p], index [%d]\n", txbuffer, i)); - } -#else - LGMNAL_GM_LOCK(nal_data); - txbuffer = gm_dma_malloc(nal_data->gm_port, LGMNAL_SMALL_MSG_SIZE(nal_data)); - LGMNAL_GM_UNLOCK(nal_data); - if (!txbuffer) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to gm_dma_malloc txbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); - PORTAL_FREE(txd, sizeof(lgmnal_stxd_t)); - return(LGMNAL_STATUS_FAIL); - } -#endif - - txd->buffer = txbuffer; - txd->size = LGMNAL_SMALL_MSG_SIZE(nal_data); - txd->gmsize = gm_min_size_for_length(txd->size); - txd->nal_data = (struct _lgmnal_data_t*)nal_data; - - if (lgmnal_hash_add(&nal_data->stxd_hash, (void*)txbuffer, (void*)txd)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to create hash entry\n")); - return(LGMNAL_STATUS_FAIL); - } - - - txd->next = nal_data->stxd; - nal_data->stxd = txd; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Registered txd [%p] with buffer [%p], size [%d]\n", txd, txd->buffer, txd->size)); - } - - return(LGMNAL_STATUS_OK); -} - -/* Free the list of wired and gm_registered small tx buffers and the tx descriptors - that go along with them. - * This function is only called when the API mutex is held (init or shutdown), - * so there is no need to hold the txd spinlock. - */ -void -lgmnal_free_stxd(lgmnal_data_t *nal_data) -{ - lgmnal_stxd_t *txd = nal_data->stxd, *_txd = NULL; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_free_small tx\n")); - - while(txd) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Freeing txd [%p] with buffer [%p], size [%d]\n", txd, txd->buffer, txd->size)); - _txd = txd; - txd = txd->next; -#if 0 - LGMNAL_GM_LOCK(nal_data); - gm_deregister_memory(nal_data->gm_port, _txd->buffer, _txd->size); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(_txd->buffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); -#else - LGMNAL_GM_LOCK(nal_data); - gm_dma_free(nal_data->gm_port, _txd->buffer); - LGMNAL_GM_UNLOCK(nal_data); -#endif - PORTAL_FREE(_txd, sizeof(lgmnal_stxd_t)); - } - return; -} - - -/* - * Get a txd from the list - * This get us a wired and gm_registered small tx buffer. - * This implicitly gets us a send token also. - */ -lgmnal_stxd_t * -lgmnal_get_stxd(lgmnal_data_t *nal_data, int block) -{ - - lgmnal_stxd_t *txd = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_get_stxd nal_data [%p] block[%d]\n", - nal_data, block)); - - if (block) { - LGMNAL_TXD_GETTOKEN(nal_data); - } else { - if (LGMNAL_TXD_TRYGETTOKEN(nal_data)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_get_stxd can't get token\n")); - return(NULL); - } - } - LGMNAL_TXD_LOCK(nal_data); - txd = nal_data->stxd; - if (txd) - nal_data->stxd = txd->next; - LGMNAL_TXD_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_get_stxd got [%p], head is [%p]\n", txd, nal_data->stxd)); - return(txd); -} - -/* - * Return a txd to the list - */ -void -lgmnal_return_stxd(lgmnal_data_t *nal_data, lgmnal_stxd_t *txd) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_return_stxd nal_data [%p], txd[%p]\n", nal_data, txd)); - - LGMNAL_TXD_LOCK(nal_data); - txd->next = nal_data->stxd; - nal_data->stxd = txd; - LGMNAL_TXD_UNLOCK(nal_data); - LGMNAL_TXD_RETURNTOKEN(nal_data); - return; -} - - -/* - * allocate a number of small rx buffers and register with GM - * so they are wired and set up for DMA. This is a costly operation. - * Also allocate a corrosponding descriptor to keep track of - * the buffer. - * Put all descriptors on singly linked list to be available to receive thread. - * This function is only called when the API mutex is held (init or shutdown), - * so there is no need to hold the rxd spinlock. - */ -int -lgmnal_alloc_srxd(lgmnal_data_t *nal_data) -{ - int nrx = 0, nsrx = 0, i = 0; - lgmnal_srxd_t *rxd = NULL; - void *rxbuffer = NULL; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_alloc_small rx\n")); - - LGMNAL_GM_LOCK(nal_data); - nrx = gm_num_receive_tokens(nal_data->gm_port); - LGMNAL_GM_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("total number of receive tokens available is [%d]\n", nrx)); - - nsrx = nrx/2; - - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocated [%d] receive tokens to small messages\n", nsrx)); - - -#ifdef LGMNAL_USE_GM_HASH - LGMNAL_GM_LOCK(nal_data); - nal_data->srxd_hash = gm_create_hash(gm_hash_compare_ptrs, gm_hash_hash_ptr, 0, sizeof(void*), nsrx, 0); - LGMNAL_GM_UNLOCK(nal_data); - if (!nal_data->srxd_hash) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to create hash table\n")); - return(LGMNAL_STATUS_NOMEM); - } -#else - nal_data->srxd_hash = NULL; -#endif - - LGMNAL_RXD_TOKEN_INIT(nal_data, nsrx); - LGMNAL_RXD_LOCK_INIT(nal_data); - - for (i=0; i<=nsrx; i++) { - PORTAL_ALLOC(rxd, sizeof(lgmnal_srxd_t)); - if (!rxd) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc rxd [%d]\n", i)); - return(LGMNAL_STATUS_NOMEM); - } -#if 0 - PORTAL_ALLOC(rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); - if (!rxbuffer) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc rxbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); - PORTAL_FREE(rxd, sizeof(lgmnal_srxd_t)); - return(LGMNAL_STATUS_FAIL); - } - LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_register_memory with port [%p] rxbuffer [%p], size [%d]\n", - nal_data->gm_port, rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data))); - LGMNAL_GM_LOCK(nal_data); - gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); - LGMNAL_GM_UNLOCK(nal_data); - if (gm_status != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_register_memory failed buffer [%p], index [%d]\n", rxbuffer, i)); - switch(gm_status) { - case(GM_FAILURE): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_FAILURE\n")); - break; - case(GM_PERMISSION_DENIED): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_PERMISSION_DENIED\n")); - break; - case(GM_INVALID_PARAMETER): - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_INVALID_PARAMETER\n")); - break; - default: - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Unknown GM error[%d]\n", gm_status)); - break; - - } - return(LGMNAL_STATUS_FAIL); - } -#else - LGMNAL_GM_LOCK(nal_data); - rxbuffer = gm_dma_malloc(nal_data->gm_port, LGMNAL_SMALL_MSG_SIZE(nal_data)); - LGMNAL_GM_UNLOCK(nal_data); - if (!rxbuffer) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to gm_dma_malloc rxbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); - PORTAL_FREE(rxd, sizeof(lgmnal_srxd_t)); - return(LGMNAL_STATUS_FAIL); - } -#endif - - rxd->buffer = rxbuffer; - rxd->size = LGMNAL_SMALL_MSG_SIZE(nal_data); - rxd->gmsize = gm_min_size_for_length(rxd->size); - - if (lgmnal_hash_add(&nal_data->srxd_hash, (void*)rxbuffer, (void*)rxd) != GM_SUCCESS) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to create hash entry rxd[%p] for rxbuffer[%p]\n", rxd, rxbuffer)); - return(LGMNAL_STATUS_FAIL); - } - - rxd->next = nal_data->srxd; - nal_data->srxd = rxd; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Registered rxd [%p] with buffer [%p], size [%d]\n", rxd, rxd->buffer, rxd->size)); - } - - return(LGMNAL_STATUS_OK); -} - - - -/* Free the list of wired and gm_registered small rx buffers and the rx descriptors - * that go along with them. - * This function is only called when the API mutex is held (init or shutdown), - * so there is no need to hold the rxd spinlock. - */ -void -lgmnal_free_srxd(lgmnal_data_t *nal_data) -{ - lgmnal_srxd_t *rxd = nal_data->srxd, *_rxd = NULL; - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_free_small rx\n")); - - while(rxd) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Freeing rxd [%p] with buffer [%p], size [%d]\n", rxd, rxd->buffer, rxd->size)); - _rxd = rxd; - rxd = rxd->next; - -#if 0 - LGMNAL_GM_LOCK(nal_data); - gm_deregister_memory(nal_data->gm_port, _rxd->buffer, _rxd->size); - LGMNAL_GM_UNLOCK(nal_data); - PORTAL_FREE(_rxd->buffer, LGMNAL_SMALL_RXBUFFER_SIZE); -#else - LGMNAL_GM_LOCK(nal_data); - gm_dma_free(nal_data->gm_port, _rxd->buffer); - LGMNAL_GM_UNLOCK(nal_data); -#endif - PORTAL_FREE(_rxd, sizeof(lgmnal_srxd_t)); - } - return; -} - - -/* - * Get a rxd from the free list - * This get us a wired and gm_registered small rx buffer. - * This implicitly gets us a receive token also. - */ -lgmnal_srxd_t * -lgmnal_get_srxd(lgmnal_data_t *nal_data, int block) -{ - - lgmnal_srxd_t *rxd = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_get_srxd nal_data [%p] block [%d]\n", nal_data, block)); - - if (block) { - LGMNAL_RXD_GETTOKEN(nal_data); - } else { - if (LGMNAL_RXD_TRYGETTOKEN(nal_data)) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_get_srxd Can't get token\n")); - return(NULL); - } - } - LGMNAL_RXD_LOCK(nal_data); - rxd = nal_data->srxd; - if (rxd) - nal_data->srxd = rxd->next; - LGMNAL_RXD_UNLOCK(nal_data); - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_get_srxd got [%p], head is [%p]\n", rxd, nal_data->srxd)); - return(rxd); -} - -/* - * Return an rxd to the list - */ -void -lgmnal_return_srxd(lgmnal_data_t *nal_data, lgmnal_srxd_t *rxd) -{ - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_return_srxd nal_data [%p], rxd[%p]\n", nal_data, rxd)); - - LGMNAL_RXD_LOCK(nal_data); - rxd->next = nal_data->srxd; - nal_data->srxd = rxd; - LGMNAL_RXD_UNLOCK(nal_data); - LGMNAL_RXD_RETURNTOKEN(nal_data); - return; -} - -/* - * Given a pointer to a srxd find - * the relevant descriptor for it - * This is done by searching a hash - * list that is created when the srxd's - * are created - */ -lgmnal_srxd_t * -lgmnal_rxbuffer_to_srxd(lgmnal_data_t *nal_data, void *rxbuffer) -{ - lgmnal_srxd_t *srxd = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_rxbuffer_to_srxd nal_data [%p], rxbuffer [%p]\n", nal_data, rxbuffer)); -#ifdef LGMNAL_USE_GM_HASH - srxd = gm_hash_find(nal_data->srxd_hash, rxbuffer); -#else - srxd = lgmnal_hash_find(nal_data->srxd_hash, rxbuffer); -#endif - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("srxd is [%p]\n", srxd)); - return(srxd); -} - - -void -lgmnal_stop_rxthread(lgmnal_data_t *nal_data) -{ - int delay = 15; - - - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Attempting to stop rxthread nal_data [%p]\n", nal_data)); - - if (nal_data->rxthread_flag != LGMNAL_THREAD_CONTINUE) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("thread flag not correctly set\n")); - } - - nal_data->rxthread_flag = LGMNAL_THREAD_STOP; - LGMNAL_GM_LOCK(nal_data); - gm_set_alarm(nal_data->gm_port, &nal_data->rxthread_alarm, 10, NULL, NULL); - LGMNAL_GM_UNLOCK(nal_data); - - while(nal_data->rxthread_flag == LGMNAL_THREAD_STOP && delay--) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_stop_rxthread sleeping\n")); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1024); - } - - if (nal_data->rxthread_flag == LGMNAL_THREAD_STOP) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("I DON'T KNOW HOW TO WAKE THE THREAD\n")); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RX THREAD SEEMS TO HAVE STOPPED\n")); - } - -} - - - -char * -lgmnal_gm_error(gm_status_t status) -{ - switch(status) { - case(GM_SUCCESS): - return("SUCCESS"); - case(GM_FAILURE): - return("FAILURE"); - case(GM_INPUT_BUFFER_TOO_SMALL): - return("INPUT_BUFFER_TOO_SMALL"); - case(GM_OUTPUT_BUFFER_TOO_SMALL): - return("OUTPUT_BUFFER_TOO_SMALL"); - case(GM_TRY_AGAIN ): - return("TRY_AGAIN"); - case(GM_BUSY): - return("BUSY"); - case(GM_MEMORY_FAULT): - return("MEMORY_FAULT"); - case(GM_INTERRUPTED): - return("INTERRUPTED"); - case(GM_INVALID_PARAMETER): - return("INVALID_PARAMETER"); - case(GM_OUT_OF_MEMORY): - return("OUT_OF_MEMORY"); - case(GM_INVALID_COMMAND): - return("INVALID_COMMAND"); - case(GM_PERMISSION_DENIED): - return("PERMISSION_DENIED"); - case(GM_INTERNAL_ERROR): - return("INTERNAL_ERROR"); - case(GM_UNATTACHED): - return("UNATTACHED"); - case(GM_UNSUPPORTED_DEVICE): - return("UNSUPPORTED_DEVICE"); - case(GM_SEND_TIMED_OUT): - return("GM_SEND_TIMEDOUT"); - case(GM_SEND_REJECTED): - return("GM_SEND_REJECTED"); - case(GM_SEND_TARGET_PORT_CLOSED): - return("GM_SEND_TARGET_PORT_CLOSED"); - case(GM_SEND_TARGET_NODE_UNREACHABLE): - return("GM_SEND_TARGET_NODE_UNREACHABLE"); - case(GM_SEND_DROPPED): - return("GM_SEND_DROPPED"); - case(GM_SEND_PORT_CLOSED): - return("GM_SEND_PORT_CLOSED"); - case(GM_NODE_ID_NOT_YET_SET): - return("GM_NODE_ID_NOT_YET_SET"); - case(GM_STILL_SHUTTING_DOWN): - return("GM_STILL_SHUTTING_DOWN"); - case(GM_CLONE_BUSY): - return("GM_CLONE_BUSY"); - case(GM_NO_SUCH_DEVICE): - return("GM_NO_SUCH_DEVICE"); - case(GM_ABORTED): - return("GM_ABORTED"); - case(GM_INCOMPATIBLE_LIB_AND_DRIVER): - return("GM_INCOMPATIBLE_LIB_AND_DRIVER"); - case(GM_UNTRANSLATED_SYSTEM_ERROR): - return("GM_UNTRANSLATED_SYSTEM_ERROR"); - case(GM_ACCESS_DENIED): - return("GM_ACCESS_DENIED"); - - -/* - * These ones are in the docs but aren't in the header file - case(GM_DEV_NOT_FOUND): - return("GM_DEV_NOT_FOUND"); - case(GM_INVALID_PORT_NUMBER): - return("GM_INVALID_PORT_NUMBER"); - case(GM_UC_ERROR): - return("GM_US_ERROR"); - case(GM_PAGE_TABLE_FULL): - return("GM_PAGE_TABLE_FULL"); - case(GM_MINOR_OVERFLOW): - return("GM_MINOR_OVERFLOW"); - case(GM_SEND_ORPHANED): - return("GM_SEND_ORPHANED"); - case(GM_HARDWARE_FAULT): - return("GM_HARDWARE_FAULT"); - case(GM_DATA_CORRUPTED): - return("GM_DATA_CORRUPTED"); - case(GM_TIMED_OUT): - return("GM_TIMED_OUT"); - case(GM_USER_ERROR): - return("GM_USER_ERROR"); - case(GM_NO_MATCH): - return("GM_NOMATCH"); - case(GM_NOT_SUPPORTED_IN_KERNEL): - return("GM_NOT_SUPPORTED_IN_KERNEL"); - case(GM_NOT_SUPPORTED_ON_ARCH): - return("GM_NOT_SUPPORTED_ON_ARCH"); - case(GM_PTE_REF_CNT_OVERFLOW): - return("GM_PTR_REF_CNT_OVERFLOW"); - case(GM_NO_DRIVER_SUPPORT): - return("GM_NO_DRIVER_SUPPORT"); - case(GM_FIRMWARE_NOT_RUNNING): - return("GM_FIRMWARE_NOT_RUNNING"); - - * These ones are in the docs but aren't in the header file - */ - default: - return("UNKNOWN GM ERROR CODE"); - } -} - - -char * -lgmnal_rxevent(gm_recv_event_t *ev) -{ - short event; - char msg[24]; - event = GM_RECV_EVENT_TYPE(ev); - switch(event) { - case(GM_NO_RECV_EVENT): - return("GM_NO_RECV_EVENT"); - case(GM_SENDS_FAILED_EVENT): - return("GM_SEND_FAILED_EVENT"); - case(GM_ALARM_EVENT): - return("GM_ALARM_EVENT"); - case(GM_SENT_EVENT): - return("GM_SENT_EVENT"); - case(_GM_SLEEP_EVENT): - return("_GM_SLEEP_EVENT"); - case(GM_RAW_RECV_EVENT): - return("GM_RAW_RECV_EVENT"); - case(GM_BAD_SEND_DETECTED_EVENT): - return("GM_BAD_SEND_DETECTED_EVENT"); - case(GM_SEND_TOKEN_VIOLATION_EVENT): - return("GM_SEND_TOKEN_VIOLATION_EVENT"); - case(GM_RECV_TOKEN_VIOLATION_EVENT): - return("GM_RECV_TOKEN_VIOLATION_EVENT"); - case(GM_BAD_RECV_TOKEN_EVENT): - return("GM_BAD_RECV_TOKEN_EVENT"); - case(GM_ALARM_VIOLATION_EVENT): - return("GM_ALARM_VIOLATION_EVENT"); - case(GM_RECV_EVENT): - return("GM_RECV_EVENT"); - case(GM_HIGH_RECV_EVENT): - return("GM_HIGH_RECV_EVENT"); - case(GM_PEER_RECV_EVENT): - return("GM_PEER_RECV_EVENT"); - case(GM_HIGH_PEER_RECV_EVENT): - return("GM_HIGH_PEER_RECV_EVENT"); - case(GM_FAST_RECV_EVENT): - return("GM_FAST_RECV_EVENT"); - case(GM_FAST_HIGH_RECV_EVENT): - return("GM_FAST_HIGH_RECV_EVENT"); - case(GM_FAST_PEER_RECV_EVENT): - return("GM_FAST_PEER_RECV_EVENT"); - case(GM_FAST_HIGH_PEER_RECV_EVENT): - return("GM_FAST_HIGH_PEER_RECV_EVENT"); - case(GM_REJECTED_SEND_EVENT): - return("GM_REJECTED_SEND_EVENT"); - case(GM_ORPHANED_SEND_EVENT): - return("GM_ORPHANED_SEND_EVENT"); - case(GM_BAD_RESEND_DETECTED_EVENT): - return("GM_BAD_RESEND_DETETED_EVENT"); - case(GM_DROPPED_SEND_EVENT): - return("GM_DROPPED_SEND_EVENT"); - case(GM_BAD_SEND_VMA_EVENT): - return("GM_BAD_SEND_VMA_EVENT"); - case(GM_BAD_RECV_VMA_EVENT): - return("GM_BAD_RECV_VMA_EVENT"); - case(_GM_FLUSHED_ALARM_EVENT): - return("GM_FLUSHED_ALARM_EVENT"); - case(GM_SENT_TOKENS_EVENT): - return("GM_SENT_TOKENS_EVENTS"); - case(GM_IGNORE_RECV_EVENT): - return("GM_IGNORE_RECV_EVENT"); - case(GM_ETHERNET_RECV_EVENT): - return("GM_ETHERNET_RECV_EVENT"); - case(GM_NEW_NO_RECV_EVENT): - return("GM_NEW_NO_RECV_EVENT"); - case(GM_NEW_SENDS_FAILED_EVENT): - return("GM_NEW_SENDS_FAILED_EVENT"); - case(GM_NEW_ALARM_EVENT): - return("GM_NEW_ALARM_EVENT"); - case(GM_NEW_SENT_EVENT): - return("GM_NEW_SENT_EVENT"); - case(_GM_NEW_SLEEP_EVENT): - return("GM_NEW_SLEEP_EVENT"); - case(GM_NEW_RAW_RECV_EVENT): - return("GM_NEW_RAW_RECV_EVENT"); - case(GM_NEW_BAD_SEND_DETECTED_EVENT): - return("GM_NEW_BAD_SEND_DETECTED_EVENT"); - case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT): - return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT"); - case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT): - return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT"); - case(GM_NEW_BAD_RECV_TOKEN_EVENT): - return("GM_NEW_BAD_RECV_TOKEN_EVENT"); - case(GM_NEW_ALARM_VIOLATION_EVENT): - return("GM_NEW_ALARM_VIOLATION_EVENT"); - case(GM_NEW_RECV_EVENT): - return("GM_NEW_RECV_EVENT"); - case(GM_NEW_HIGH_RECV_EVENT): - return("GM_NEW_HIGH_RECV_EVENT"); - case(GM_NEW_PEER_RECV_EVENT): - return("GM_NEW_PEER_RECV_EVENT"); - case(GM_NEW_HIGH_PEER_RECV_EVENT): - return("GM_NEW_HIGH_PEER_RECV_EVENT"); - case(GM_NEW_FAST_RECV_EVENT): - return("GM_NEW_FAST_RECV_EVENT"); - case(GM_NEW_FAST_HIGH_RECV_EVENT): - return("GM_NEW_FAST_HIGH_RECV_EVENT"); - case(GM_NEW_FAST_PEER_RECV_EVENT): - return("GM_NEW_FAST_PEER_RECV_EVENT"); - case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT): - return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT"); - case(GM_NEW_REJECTED_SEND_EVENT): - return("GM_NEW_REJECTED_SEND_EVENT"); - case(GM_NEW_ORPHANED_SEND_EVENT): - return("GM_NEW_ORPHANED_SEND_EVENT"); - case(_GM_NEW_PUT_NOTIFICATION_EVENT): - return("_GM_NEW_PUT_NOTIFICATION_EVENT"); - case(GM_NEW_FREE_SEND_TOKEN_EVENT): - return("GM_NEW_FREE_SEND_TOKEN_EVENT"); - case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT): - return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT"); - case(GM_NEW_BAD_RESEND_DETECTED_EVENT): - return("GM_NEW_BAD_RESEND_DETECTED_EVENT"); - case(GM_NEW_DROPPED_SEND_EVENT): - return("GM_NEW_DROPPED_SEND_EVENT"); - case(GM_NEW_BAD_SEND_VMA_EVENT): - return("GM_NEW_BAD_SEND_VMA_EVENT"); - case(GM_NEW_BAD_RECV_VMA_EVENT): - return("GM_NEW_BAD_RECV_VMA_EVENT"); - case(_GM_NEW_FLUSHED_ALARM_EVENT): - return("GM_NEW_FLUSHED_ALARM_EVENT"); - case(GM_NEW_SENT_TOKENS_EVENT): - return("GM_NEW_SENT_TOKENS_EVENT"); - case(GM_NEW_IGNORE_RECV_EVENT): - return("GM_NEW_IGNORE_RECV_EVENT"); - case(GM_NEW_ETHERNET_RECV_EVENT): - return("GM_NEW_ETHERNET_RECV_EVENT"); - default: - snprintf(msg, 24, "Unknown Recv event [%d]", event); - return(msg); -#if 0 - case(/* _GM_PUT_NOTIFICATION_EVENT */ - case(/* GM_FREE_SEND_TOKEN_EVENT */ - case(/* GM_FREE_HIGH_SEND_TOKEN_EVENT */ -#endif - } -} - - -void -lgmnal_yield(int delay) -{ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(delay); -} - -int -lgmnal_is_small_message(lgmnal_data_t *nal_data, int niov, struct iovec *iov, int len) -{ - - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_is_small_message len is [%d]\n", len)); - if (len < LGMNAL_SMALL_MSG_SIZE(nal_data)) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Yep, small message]\n")); - return(1); - } else { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No, not small message]\n")); - return(0); - } -} - -void * -lgmnal_hash_find(lgmnal_hash_t *hash, void *key) -{ - void *data = NULL; - int count = 0; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_find hash [%p] key [%p]\n", hash, key)); - - while (hash) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find Stepping [%d]\n", count++)); - if (hash->key == key) { - data = hash->data; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find hash got data[%p]\n", data)); - return(data); - } else - hash = hash->next; - } - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find data not found\n")); - return(NULL); -} - -/* - * TO DO hash. figure out why getting bad stuff from gm_hash and thne use it. - */ - -int -lgmnal_hash_add(lgmnal_hash_t **hash, void *key, void *data) -{ - -#ifdef LGMNAL_USE_GM_HASH - return(gm_hash_insert(*hash, (void*)key, (void*)data); -#else - lgmnal_hash_t *new = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_add hash [%p]\n", *hash)); - PORTAL_ALLOC(new, sizeof(lgmnal_hash_t)); - memset(new, 0, sizeof(lgmnal_hash_t)); - if (!new) { - LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_hash_add :: can't get memory\n")); - return(-1); - } - new->data = data; - new->key = key; - new->next = *hash; - *hash = new; - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_add hash head [%p]\n", *hash)); - return(0); -#endif -} - -void -lgmnal_hash_free(lgmnal_hash_t **hash) -{ - - lgmnal_hash_t *_hash = NULL; - LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_free hash [p%]\n", *hash)); - - while (*hash) { - LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_free freeing hash [p%]\n", _hash)); - _hash = *hash; - *hash = _hash->next; - PORTAL_FREE(_hash, sizeof(lgmnal_hash_t)); - } - return; -} - - -EXPORT_SYMBOL(lgmnal_yield); -EXPORT_SYMBOL(lgmnal_print); -EXPORT_SYMBOL(lgmnal_alloc_srxd); -EXPORT_SYMBOL(lgmnal_get_srxd); -EXPORT_SYMBOL(lgmnal_return_srxd); -EXPORT_SYMBOL(lgmnal_free_srxd); -EXPORT_SYMBOL(lgmnal_alloc_stxd); -EXPORT_SYMBOL(lgmnal_get_stxd); -EXPORT_SYMBOL(lgmnal_return_stxd); -EXPORT_SYMBOL(lgmnal_free_stxd); -EXPORT_SYMBOL(lgmnal_rxbuffer_to_srxd); -EXPORT_SYMBOL(lgmnal_rxevent); -EXPORT_SYMBOL(lgmnal_gm_error); -EXPORT_SYMBOL(lgmnal_stop_rxthread); diff --git a/lustre/portals/tests/startclient.sh b/lustre/portals/tests/startclient.sh index c9b7c16..de01bc7 100755 --- a/lustre/portals/tests/startclient.sh +++ b/lustre/portals/tests/startclient.sh @@ -29,9 +29,16 @@ case "$1" in /sbin/insmod ./$PING echo kqswnal > /tmp/nal ;; + + gm) + /sbin/insmod portals + /sbin/insmod kgmnal + /sbin/insmod ./$PING + echo kgmnal > /tmp/nal + ;; *) - echo "Usage : ${0} < tcp | toe | elan >" + echo "Usage : ${0} < tcp | toe | elan | gm>" exit 1; esac exit 0; diff --git a/lustre/portals/tests/startserver.sh b/lustre/portals/tests/startserver.sh index 942300e..4f66eeb 100755 --- a/lustre/portals/tests/startserver.sh +++ b/lustre/portals/tests/startserver.sh @@ -29,9 +29,16 @@ case "$1" in /sbin/insmod ./$PING nal=4 echo kqswnal > /tmp/nal ;; + + gm) + /sbin/insmod portals + /sbin/insmod kgmnal + /sbin/insmod ./$PING nal=3 + echo kgmnal > /tmp/nal + ;; *) - echo "Usage : ${0} < tcp | toe | elan >" + echo "Usage : ${0} < tcp | toe | elan | gm>" exit 1; esac ../utils/acceptor 9999& diff --git a/lustre/portals/utils/.cvsignore b/lustre/portals/utils/.cvsignore index 8e474ad..e2a0d44 100644 --- a/lustre/portals/utils/.cvsignore +++ b/lustre/portals/utils/.cvsignore @@ -6,4 +6,5 @@ ptlctl .deps routerstat wirecheck +gmnalnid .*.cmd diff --git a/lustre/portals/utils/Makefile.am b/lustre/portals/utils/Makefile.am index d51e3b3..c79909c 100644 --- a/lustre/portals/utils/Makefile.am +++ b/lustre/portals/utils/Makefile.am @@ -7,7 +7,7 @@ COMPILE = $(CC) -Wall -g -I$(srcdir)/../include LINK = $(CC) -o $@ -sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck +sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid lib_LIBRARIES = libptlctl.a acceptor_SOURCES = acceptor.c # -lefence @@ -16,6 +16,8 @@ wirecheck_SOURCES = wirecheck.c libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h +gmnalnid_SOURCES = gmnalnid.c + ptlctl_SOURCES = ptlctl.c ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence ptlctl_DEPENDENCIES = libptlctl.a diff --git a/lustre/portals/utils/gmnalnid.c b/lustre/portals/utils/gmnalnid.c new file mode 100644 index 0000000..701a814 --- /dev/null +++ b/lustre/portals/utils/gmnalnid.c @@ -0,0 +1,118 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2003 Los Alamos National Laboratory (LANL) + * + * This file is part of Lustre, http://www.lustre.org/ + * + * This file is free software; you can redistribute it and/or + * modify it under the terms of version 2.1 of the GNU Lesser General + * Public License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Portals; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#define GMNAL_IOC_GET_GNID 1 + +int +roundup(int len) +{ + return((len+7) & (~0x7)); +} + +int main(int argc, char **argv) +{ + int rc, pfd; + struct portal_ioctl_data data; + unsigned int nid = 0, len; + char *name = NULL; + int c; + + + + while ((c = getopt(argc, argv, "n:l")) != -1) { + switch(c) { + case('n'): + name = optarg; + break; + case('l'): + printf("Get local id not implemented yet!\n"); + exit(-1); + default: + printf("usage %s -n nodename [-p]\n", argv[0]); + } + } + + if (!name) { + printf("usage %s -n nodename [-p]\n", argv[0]); + exit(-1); + } + + + + PORTAL_IOC_INIT (data); + + /* + * set up the inputs + */ + len = strlen(name) + 1; + data.ioc_pbuf1 = malloc(len); + strcpy(data.ioc_pbuf1, name); + data.ioc_plen1 = len; + + /* + * set up the outputs + */ + data.ioc_pbuf2 = (void*)&nid; + data.ioc_plen2 = sizeof(unsigned int*); + + pfd = open("/dev/portals", O_RDWR); + if ( pfd < 0 ) { + perror("opening portals device"); + free(data.ioc_pbuf1); + exit(-1); + } + + data.ioc_nal = GMNAL; + data.ioc_nal_cmd = GMNAL_IOC_GET_GNID; +/* + data.ioc_len += data.ioc_inllen1; + data.ioc_len += data.ioc_plen1; +*/ + rc = ioctl (pfd, IOC_PORTAL_NAL_CMD, &data); + if (rc < 0) + { + perror ("Can't get my NID"); + } + + free(data.ioc_pbuf1); + close(pfd); + printf("%u\n", nid); + exit(nid); +} -- 1.8.3.1