From 90a5f51017ed10b713b8a927c34db0c50a013fa7 Mon Sep 17 00:00:00 2001 From: mdoyle Date: Fri, 1 Aug 2003 09:31:53 +0000 Subject: [PATCH] Portals NAL for Myrinet GM2 for Lustre (lgmnal) --- lnet/klnds/lgmlnd/Makefile.am | 13 + lnet/klnds/lgmlnd/Makefile.mk | 10 + lnet/klnds/lgmlnd/lgmnal.h | 451 +++++++++++++++ lnet/klnds/lgmlnd/lgmnal_api.c | 518 +++++++++++++++++ lnet/klnds/lgmlnd/lgmnal_cb.c | 248 ++++++++ lnet/klnds/lgmlnd/lgmnal_comm.c | 464 +++++++++++++++ lnet/klnds/lgmlnd/lgmnal_module.c | 127 +++++ lnet/klnds/lgmlnd/lgmnal_utils.c | 848 ++++++++++++++++++++++++++++ lustre/portals/knals/lgmnal/Makefile.am | 13 + lustre/portals/knals/lgmnal/Makefile.mk | 10 + lustre/portals/knals/lgmnal/lgmnal.h | 451 +++++++++++++++ lustre/portals/knals/lgmnal/lgmnal_api.c | 518 +++++++++++++++++ lustre/portals/knals/lgmnal/lgmnal_cb.c | 248 ++++++++ lustre/portals/knals/lgmnal/lgmnal_comm.c | 464 +++++++++++++++ lustre/portals/knals/lgmnal/lgmnal_module.c | 127 +++++ lustre/portals/knals/lgmnal/lgmnal_utils.c | 848 ++++++++++++++++++++++++++++ 16 files changed, 5358 insertions(+) create mode 100644 lnet/klnds/lgmlnd/Makefile.am create mode 100644 lnet/klnds/lgmlnd/Makefile.mk create mode 100644 lnet/klnds/lgmlnd/lgmnal.h create mode 100644 lnet/klnds/lgmlnd/lgmnal_api.c create mode 100644 lnet/klnds/lgmlnd/lgmnal_cb.c create mode 100644 lnet/klnds/lgmlnd/lgmnal_comm.c create mode 100644 lnet/klnds/lgmlnd/lgmnal_module.c create mode 100644 lnet/klnds/lgmlnd/lgmnal_utils.c create mode 100644 lustre/portals/knals/lgmnal/Makefile.am create mode 100644 lustre/portals/knals/lgmnal/Makefile.mk create mode 100644 lustre/portals/knals/lgmnal/lgmnal.h create mode 100644 lustre/portals/knals/lgmnal/lgmnal_api.c create mode 100644 lustre/portals/knals/lgmnal/lgmnal_cb.c create mode 100644 lustre/portals/knals/lgmnal/lgmnal_comm.c create mode 100644 lustre/portals/knals/lgmnal/lgmnal_module.c create mode 100644 lustre/portals/knals/lgmnal/lgmnal_utils.c diff --git a/lnet/klnds/lgmlnd/Makefile.am b/lnet/klnds/lgmlnd/Makefile.am new file mode 100644 index 0000000..6794494 --- /dev/null +++ b/lnet/klnds/lgmlnd/Makefile.am @@ -0,0 +1,13 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +include ../../Rules.linux + +MODULE = lgmnal +modulenet_DATA = lgmnal.o +EXTRA_PROGRAMS = lgmnal + +DEFS = +lgmnal_SOURCES = lgmnal.h lgmnal_api.c lgmnal_cb.c lgmnal_comm.c lgmnal_utils.c lgmnal_module.c diff --git a/lnet/klnds/lgmlnd/Makefile.mk b/lnet/klnds/lgmlnd/Makefile.mk new file mode 100644 index 0000000..c8ca67f --- /dev/null +++ b/lnet/klnds/lgmlnd/Makefile.mk @@ -0,0 +1,10 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +include ../../Kernelenv + +obj-y += lgmnal.o +lgmnal-objs := lgmnal_api.o lgmnal_cb.o lgmnal_utils.o lgmnal_comm.o lgmnal_module.o + diff --git a/lnet/klnds/lgmlnd/lgmnal.h b/lnet/klnds/lgmlnd/lgmnal.h new file mode 100644 index 0000000..1147078 --- /dev/null +++ b/lnet/klnds/lgmlnd/lgmnal.h @@ -0,0 +1,451 @@ +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + +/* + * Portals GM kernel NAL header file + * This file makes all declaration and prototypes + * for the API side and CB side of the NAL + */ +#ifndef __INCLUDE_LGMNAL_H__ +#define __INCLUDE_LGMNAL_H__ + +#include "linux/config.h" +#include "linux/module.h" +#include "linux/tty.h" +#include "linux/kernel.h" +#include "linux/mm.h" +#include "linux/string.h" +#include "linux/stat.h" +#include "linux/errno.h" +#include "linux/locks.h" +#include "linux/unistd.h" +#include "linux/init.h" +#include "linux/sem.h" +#include "linux/vmalloc.h" +#ifdef MODVERSIONS +#include +#endif + + +#include "portals/nal.h" +#include "portals/api.h" +#include "portals/errno.h" +#include "linux/kp30.h" +#include "portals/p30.h" + +#include "portals/lib-nal.h" +#include "portals/lib-p30.h" + +#define GM_STRONG_TYPES 1 +#include "gm.h" +#include "gm_internal.h" + + +/* + * Defines for the API NAL + */ + + + +/* + * Small message size is configurable + * insmod can set small_msg_size + * which is used to populate nal_data.small_msg_size + */ +#define LGMNAL_SMALL_MESSAGE 1078 +#define LGMNAL_LARGE_MESSAGE_INIT 1079 +#define LGMNAL_LARGE_MESSAGE_ACK 1080 +#define LGMNAL_LARGE_MESSAGE_FINI 1081 + +extern int lgmnal_small_msg_size; +#define LGMNAL_SMALL_MSG_SIZE(a) a->small_msg_size +#define LGMNAL_IS_SMALL_MESSAGE(n,a,b,c) lgmnal_is_small_message(n, a, b, c) +#define LGMNAL_MAGIC 0x1234abcd + +typedef struct _lgmnal_hash { + void *key; + void *data; + struct _lgmnal_hash *next; + } lgmnal_hash_t; + +/* + * Small Transmit Descriptor + * A structre to keep track of a small transmit operation + * This structure has a one-to-one relationship with a small + * transmit buffer (both create by lgmnal_stxd_alloc). + * stxd has pointer to txbuffer and the hash table in nal_data + * allows us to go the other way. + */ +typedef struct _lgmnal_stxd_t { + void *buffer; /* Address of small wired buffer this decriptor uses */ + int size; /* size (in bytes) of the tx buffer this descripto uses */ + gm_size_t gmsize; /* gmsize of the tx buffer this descripto uses */ + int type; /* large or small message */ + struct _lgmnal_data_t *nal_data; + lib_msg_t *cookie; /* the cookie the portals library gave us */ + int niov; + struct iovec iov[PTL_MD_MAX_IOV]; + struct _lgmnal_stxd_t *next; +} lgmnal_stxd_t; + +/* + * as for lgmnal_stxd_t + */ +typedef struct _lgmnal_srxd_t { + void *buffer; + int size; + gm_size_t gmsize; + int type; + struct _lgmnal_srxd_t *next; +} lgmnal_srxd_t; + +/* + * Header which lmgnal puts at the start of each message + */ +typedef struct _lgmnal_msghdr { + int magic; + int type; + unsigned int sender_node_id; + lgmnal_stxd_t *stxd; + } lgmnal_msghdr_t; +#define LGMNAL_MSGHDR_SIZE sizeof(lgmnal_msghdr_t) + +/* + * There's one of these for each interface that is initialised + * There's a maximum of LGMNAL_NUM_IF lgmnal_data_t + */ + +typedef struct _lgmnal_data_t { + int refcnt; +#ifdef LGMNAL_API_LOCK_SPIN + spinlock_t api_lock; /* lock provided for api->lock function */ +#else + struct semaphore api_lock; +#endif + spinlock_t cb_lock; /* lock provided for cb_cli function */ + char _cb_file[128]; + char _cb_function[128]; + int _cb_line; + spinlock_t stxd_lock; /* lock to add or remove stxd to/from free list */ + struct semaphore stxd_token; /* Don't try to access the list until get a token */ + lgmnal_stxd_t *stxd; /* list of free stxd's */ +#ifdef LGMNAL_USE_GM_HASH + struct gm_hash *stxd_hash; /* hash to translate txbuffer to stxd. Created in stxd_alloc */ +#else + lgmnal_hash_t *stxd_hash; /* hash to translate txbuffer to stxd. Created in stxd_alloc */ +#endif + spinlock_t srxd_lock; + struct semaphore srxd_token; + lgmnal_srxd_t *srxd; +#ifdef LGMNAL_USE_GM_HASH + struct gm_hash *srxd_hash; +#else + lgmnal_hash_t *srxd_hash; +#endif + nal_t *nal; /* our API NAL */ + nal_cb_t *nal_cb; /* our CB nal */ + struct gm_port *gm_port; /* the gm port structure we open in lgmnal_init */ + unsigned int gm_local_nid; /* our gm local node id */ + unsigned int gm_global_nid; /* our gm global node id */ + spinlock_t gm_lock; /* GM is not threadsage */ + long rxthread_pid; /* thread id of our receiver thread */ + int rxthread_flag; /* stop the thread flag */ + gm_alarm_t rxthread_alarm; /* used to wake sleeping rx thread */ + int small_msg_size; + int small_msg_gmsize; + char _file[128]; + char _function[128]; + int _line; +} lgmnal_data_t; + +/* + * For nal_data->rxthread_flag + */ +#define LGMNAL_THREAD_START 444 +#define LGMNAL_THREAD_STARTED 333 +#define LGMNAL_THREAD_CONTINUE 777 +#define LGMNAL_THREAD_STOP 666 +#define LGMNAL_THREAD_STOPPED 555 + +#define LGMNAL_NUM_IF 1 + +#if 0 +/* + * A global structre to maintain 1 nal_data structure for each + * myrinet card that the user initialises (only tested for 1) + * To add or remove any nal_data structures from the ifs arrary the + * init_lock must be acquired. This is the only time this lock is acquired + */ +typedef struct _lgmnal_global_t { + int debug_level; + struct semaphore init_lock; + lgmnal_data_t *ifs[LGMNAL_NUM_IF]; +} lgmnal_global_t; + +extern lgmnal_data_t global_nal_data; +#define LGMNAL_DEBUG_LEVEL lgmnal_global.debug_level +#else +extern lgmnal_data_t *global_nal_data; +extern int lgmnal_debug_level; +#define LGMNAL_DEBUG_LEVEL lgmnal_debug_level +#endif + +/* + * The gm_port to use for lgmnal + */ +#define LGMNAL_GM_PORT 4 + +/* + * for ioctl get pid + */ +#define LGMNAL_IOC_GET_GNID 1 + +/* + * LGMNAL_DEBUG_LEVEL set by module load 0= level) lgmnal_print args +#else +#define LGMNAL_PRINT(level, args) +#endif + +#define LGMNAL_DEBUG_ERR 1 /* only report errors */ +#define LGMNAL_DEBUG_TRACE 2 /* on entering function */ +#define LGMNAL_DEBUG_V 3 /* debug */ +#define LGMNAL_DEBUG_VV 4 /* more debug */ + +/* + * Return codes + */ +#define LGMNAL_STATUS_OK 0 +#define LGMNAL_STATUS_FAIL 1 +#define LGMNAL_STATUS_NOMEM 2 + + +/* + * FUNCTION PROTOTYPES + */ + +/* + * Locking macros + */ + +/* + * To access the global structure + * to add or remove interface (lgmnal_init) or shutdown only + */ +#define LGMNAL_GLOBAL_LOCK_INIT sema_init(&(lgmnal_global.init_lock), 1) +#define LGMNAL_GLOBAL_LOCK do { \ + LGMNAL_PRINT(1, ("Acquiring global mutex\n")); \ + down(&(lgmnal_global.init_lock)); \ + LGMNAL_PRINT(1, ("Got global lock\n")); \ + } while (0) +#define LGMNAL_GLOBAL_UNLOCK do { \ + LGMNAL_PRINT(1, ("Releasing global mutex\n")); \ + up(&(lgmnal_global.init_lock)); \ + LGMNAL_PRINT(1, ("Release global mutex\n")); \ + } while (0) + +/* + * For the API lock function + */ +#ifdef LGMNAL_API_LOCK_SPIN +#define LGMNAL_API_LOCK_INIT(a) spin_lock_init(&a->api_lock) +#define LGMNAL_API_LOCK(a) spin_lock(&a->api_lock) +#define LGMNAL_API_UNLOCK(a) spin_unlock(&a->api_lock) +#else +#define LGMNAL_API_LOCK_INIT(a) sema_init(&a->api_lock, 1) +#define LGMNAL_API_LOCK(a) down(&a->api_lock) +#define LGMNAL_API_UNLOCK(a) up(&a->api_lock) +#endif + +/* + * For the Small tx and rx descriptor lists + */ +#define LGMNAL_TXD_LOCK_INIT(a) spin_lock_init(&a->stxd_lock); +#define LGMNAL_TXD_LOCK(a) spin_lock(&a->stxd_lock); +#define LGMNAL_TXD_UNLOCK(a) spin_unlock(&a->stxd_lock); +#define LGMNAL_TXD_TOKEN_INIT(a, n) sema_init(&a->stxd_token, n); +#define LGMNAL_TXD_GETTOKEN(a) down(&a->stxd_token); +#define LGMNAL_TXD_TRYGETTOKEN(a) down_trylock(&a->stxd_token) +#define LGMNAL_TXD_RETURNTOKEN(a) up(&a->stxd_token); + + +#define LGMNAL_RXD_LOCK_INIT(a) spin_lock_init(&a->srxd_lock); +#define LGMNAL_RXD_LOCK(a) spin_lock(&a->srxd_lock); +#define LGMNAL_RXD_UNLOCK(a) spin_unlock(&a->srxd_lock); +#define LGMNAL_RXD_TOKEN_INIT(a, n) sema_init(&a->srxd_token, n); +#define LGMNAL_RXD_GETTOKEN(a) down(&a->srxd_token); +#define LGMNAL_RXD_TRYGETTOKEN(a) down_trylock(&a->srxd_token) +#define LGMNAL_RXD_RETURNTOKEN(a) up(&a->srxd_token); + +#define LGMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock); +#define LGMNAL_GM_LOCK(a) do { \ + while (!spin_trylock(&a->gm_lock)) { \ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("waiting %s:%s:%d holder %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__, nal_data->_function, nal_data->_file, nal_data->_line)); \ + lgmnal_yield(128); \ + } \ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("GM Locked %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__)); \ + sprintf(nal_data->_function, "%s", __FUNCTION__); \ + sprintf(nal_data->_file, "%s", __FILE__); \ + nal_data->_line = __LINE__; \ + } while (0) +#define LGMNAL_GM_UNLOCK(a) do { \ + spin_unlock(&a->gm_lock); \ + memset(nal_data->_function, 0, 128); \ + memset(nal_data->_file, 0, 128); \ + nal_data->_line = 0; \ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("GM Unlocked %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__)); \ + } while(0); + +#define LGMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock); + + +/* + * API NAL + */ +int lgmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); + +int lgmnal_api_shutdown(nal_t *, int); + +int lgmnal_api_validate(nal_t *, void *, size_t); + +void lgmnal_api_yield(nal_t *); + +void lgmnal_api_lock(nal_t *, unsigned long *); + +void lgmnal_api_unlock(nal_t *, unsigned long *); + + +#define LGMNAL_INIT_NAL(a) do { \ + a->forward = lgmnal_api_forward; \ + a->shutdown = lgmnal_api_shutdown; \ + a->validate = NULL; \ + a->yield = lgmnal_api_yield; \ + a->lock = lgmnal_api_lock; \ + a->unlock = lgmnal_api_unlock; \ + a->timeout = NULL; \ + a->refct = 1; \ + a->nal_data = NULL; \ + } while (0) + + +/* + * CB NAL + */ + +int lgmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t); + +int lgmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t); + +int lgmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *, + unsigned int, struct iovec *, size_t, size_t); + +int lgmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *, + unsigned int, ptl_kiov_t *, size_t, size_t); + +int lgmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t); + +int lgmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t); + +int lgmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *); + +void *lgmnal_cb_malloc(nal_cb_t *, size_t); + +void lgmnal_cb_free(nal_cb_t *, void *, size_t); + +void lgmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **); + +int lgmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **); + +void lgmnal_cb_printf(nal_cb_t *, const char *fmt, ...); + +void lgmnal_cb_cli(nal_cb_t *, unsigned long *); + +void lgmnal_cb_sti(nal_cb_t *, unsigned long *); + +int lgmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); + +nal_t *lgmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); + +void lgmnal_fini(void); + + + +#define LGMNAL_INIT_NAL_CB(a) do { \ + a->cb_send = lgmnal_cb_send; \ + a->cb_send_pages = lgmnal_cb_send_pages; \ + a->cb_recv = lgmnal_cb_recv; \ + a->cb_recv_pages = lgmnal_cb_recv_pages; \ + a->cb_read = lgmnal_cb_read; \ + a->cb_write = lgmnal_cb_write; \ + a->cb_callback = lgmnal_cb_callback; \ + a->cb_malloc = lgmnal_cb_malloc; \ + a->cb_free = lgmnal_cb_free; \ + a->cb_map = NULL; \ + a->cb_unmap = NULL; \ + a->cb_printf = lgmnal_cb_printf; \ + a->cb_cli = lgmnal_cb_cli; \ + a->cb_sti = lgmnal_cb_sti; \ + a->cb_dist = lgmnal_cb_dist; \ + a->nal_data = NULL; \ + } while (0) + +/* + * lgmnal utilities + */ + +void lgmnal_print(const char *, ...); + +/* + * Small Transmit and Receive Descriptor Functions + */ +int lgmnal_alloc_stxd(lgmnal_data_t *); +void lgmnal_free_stxd(lgmnal_data_t *); +lgmnal_stxd_t* lgmnal_get_stxd(lgmnal_data_t *, int); +void lgmnal_return_stxd(lgmnal_data_t *, lgmnal_stxd_t *); + +int lgmnal_alloc_srxd(lgmnal_data_t *); +void lgmnal_free_srxd(lgmnal_data_t *); +lgmnal_srxd_t* lgmnal_get_srxd(lgmnal_data_t *, int); +void lgmnal_return_srxd(lgmnal_data_t *, lgmnal_srxd_t *); + +/* + * general utility functions + */ +lgmnal_srxd_t *lgmnal_rxbuffer_to_srxd(lgmnal_data_t *, void*); +lgmnal_stxd_t *lgmnal_txbuffer_to_stxd(lgmnal_data_t *, void*); +void lgmnal_stop_rxthread(lgmnal_data_t *); +void lgmnal_small_tx_done(gm_port_t *, void *, gm_status_t); +char *lgmnal_gm_error(gm_status_t); +char *lgmnal_rxevent(gm_recv_event_t*); +int lgmnal_is_small_message(lgmnal_data_t*, int, struct iovec*, int); + +void *lgmnal_hash_find(lgmnal_hash_t *, void*); +int lgmnal_hash_add(lgmnal_hash_t**, void*, void*); +void lgmnal_hash_free(lgmnal_hash_t**); + +/* + * Communication functions + */ +int lgmnal_receive_thread(void *); +int +lgmnal_small_transmit(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec*, int); + +int +lgmnal_small_receive2(nal_cb_t *, void *, lib_msg_t *, unsigned int, struct iovec *, size_t, size_t); + +void lgmnal_yield(int); + +#endif /*__INCLUDE_LGMNAL_H__*/ diff --git a/lnet/klnds/lgmlnd/lgmnal_api.c b/lnet/klnds/lgmlnd/lgmnal_api.c new file mode 100644 index 0000000..8e774bf --- /dev/null +++ b/lnet/klnds/lgmlnd/lgmnal_api.c @@ -0,0 +1,518 @@ + +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + + + +/* + * Implements the API NAL functions + */ + +#include "lgmnal.h" + +lgmnal_data_t *global_nal_data = NULL; +/* + * lgmnal_api_forward + * This function takes a pack block of arguments from the NAL API + * module and passes them to the NAL CB module. The CB module unpacks + * the args and calls the appropriate function indicated by index. + * Typically this function is used to pass args between kernel and use + * space. + * As lgmanl exists entirely in kernel, just pass the arg block directly to + * the NAL CB, buy passing the args to lib_dispatch + * Arguments are + * nal_t nal Our nal + * int index the api function that initiated this call + * void *args packed block of function args + * size_t arg_len length of args block + * void *ret A return value for the API NAL + * size_t ret_len Size of the return value + * + */ + +int +lgmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, + void *ret, size_t ret_len) +{ + + nal_cb_t *nal_cb = NULL; + lgmnal_data_t *nal_data = NULL; + + + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_forward: nal [%p], index [%d], args [%p], arglen [%d], ret [%p], retlen [%d]\n", nal, index, args, arg_len, ret, ret_len)); + + if (!nal || !args || (index < 0) || (arg_len < 0)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Bad args to lgmnal_api_forward\n")); +#ifdef LGMNAL_DEBUG + if (!nal) + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No nal specified\n")); + if (!args) + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No args specified\n")); + if (index < 0) + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Index is negative[%d]\n", index)); + if (arg_len < 0) + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("arg_len is negative [%d]\n", arg_len)); +#endif + return (PTL_FAIL); + } + + if (ret && (ret_len <= 0)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Bad args to lgmnal_api_forward\n")); +#ifdef LGMNAL_DEBUG + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("ret_len is [%d]\n", ret_len)); +#endif + return (PTL_FAIL); + } + + + if (!nal->nal_data) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad nal, no nal data\n")); + return (PTL_FAIL); + } + + nal_data = nal->nal_data; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("nal_data is [%p]\n", nal_data)); + + if (!nal_data->nal_cb) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad nal_data, no nal_cb\n")); + return (PTL_FAIL); + } + + nal_cb = nal_data->nal_cb; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("nal_cb is [%p]\n", nal_cb)); + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_api_forward calling lib_dispatch\n")); + lib_dispatch(nal_cb, NULL, index, args, ret); + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_api_forward returns from lib_dispatch\n")); + + return(PTL_OK); +} + + +/* + * lgmnal_api_shutdown + * Close down this interface and free any resources associated with it + * nal_t nal our nal to shutdown + */ +int +lgmnal_api_shutdown(nal_t *nal, int interface) +{ + + lgmnal_data_t *nal_data = nal->nal_data; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_shutdown: nal_data [%p]\n", nal_data)); + + /* + * TO DO lgmnal_api_shutdown what is to be done? + */ + + return(PTL_OK); +} + + +/* + * lgmnal_api_validate + * validate a user address for use in communications + * There's nothing to be done here + */ +int +lgmnal_api_validate(nal_t *nal, void *base, size_t extent) +{ + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_validate : nal [%p], base [%p], extent [%d]\n", nal, base, extent)); + + return(PTL_OK); +} + + + +/* + * lgmnal_api_yield + * Give up the processor + */ +void +lgmnal_api_yield(nal_t *nal) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_yield : nal [%p]\n", nal)); + + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + + return; +} + + + +/* + * lgmnal_api_lock + * Take a threadsafe lock + */ +void +lgmnal_api_lock(nal_t *nal, unsigned long *flags) +{ + + lgmnal_data_t *nal_data; + nal_cb_t *nal_cb; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_lock : nal [%p], flagsa [%p] flags[%ul]\n", nal, flags, *flags)); + + nal_data = nal->nal_data; + nal_cb = nal_data->nal_cb; + + nal_cb->cb_cli(nal_cb, flags); +/* + LGMNAL_API_LOCK(nal_data); +*/ + + return; +} + +/* + * lgmnal_api_unlock + * Release a threadsafe lock + */ +void +lgmnal_api_unlock(nal_t *nal, unsigned long *flags) +{ + lgmnal_data_t *nal_data; + nal_cb_t *nal_cb; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_lock : nal [%p], flags [%p]\n", nal, flags)); + + nal_data = nal->nal_data; + if (!nal_data) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_api_unlock bad nal, no nal_data\n")); + } + nal_cb = nal_data->nal_cb; + if (!nal_cb) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_api_unlock bad nal_data, no nal_cb\n")); + } + + nal_cb->cb_sti(nal_cb, flags); +/* + LGMNAL_API_UNLOCK(nal_data); +*/ + + return; +} + + +nal_t * +lgmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, ptl_pid_t rpid) +{ + + nal_t *nal = NULL; + nal_cb_t *nal_cb = NULL; + lgmnal_data_t *nal_data = NULL; + lgmnal_srxd_t *srxd = NULL; + gm_status_t gm_status; + unsigned int local_nid = 0, global_nid = 0; + ptl_nid_t portals_nid; + ptl_pid_t portals_pid = 0; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_init : interface [%d], ptl_size [%d], ac_size[%d]\n", + interface, ptl_size, ac_size)); + + if ((interface < 0) || (interface > LGMNAL_NUM_IF) || (ptl_size <= 0) || (ac_size <= 0) ) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad args\n")); + return(NULL); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("parameters check out ok\n")); + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Acquired global lock\n")); + + + PORTAL_ALLOC(nal_data, sizeof(lgmnal_data_t)); + if (!nal_data) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("can't get memory\n")); + return(NULL); + } + memset(nal_data, 0, sizeof(lgmnal_data_t)); + /* + * set the small message buffer size + */ + nal_data->refcnt = 1; + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal_data[%p]\n", nal_data)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("small_msg_size is [%d]\n", nal_data->small_msg_size)); + + PORTAL_ALLOC(nal, sizeof(nal_t)); + if (!nal) { + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + return(NULL); + } + memset(nal, 0, sizeof(nal_t)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal[%p]\n", nal)); + + PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); + if (!nal_cb) { + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + return(NULL); + } + memset(nal_cb, 0, sizeof(nal_cb_t)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal_cb[%p]\n", nal_cb)); + + LGMNAL_INIT_NAL(nal); + LGMNAL_INIT_NAL_CB(nal_cb); + /* + * String them all together + */ + nal->nal_data = (void*)nal_data; + nal_cb->nal_data = (void*)nal_data; + nal_data->nal = nal; + nal_data->nal_cb = nal_cb; + + LGMNAL_API_LOCK_INIT(nal_data); + LGMNAL_CB_LOCK_INIT(nal_data); + LGMNAL_GM_LOCK_INIT(nal_data); + + + /* + * initialise the interface, + */ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling gm_init\n")); + if (gm_init() != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("call to gm_init failed\n")); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_open with interface [%d], port [%d], name [%s], version [%d]\n", interface, LGMNAL_GM_PORT, "lgmnal", GM_API_VERSION)); + + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_open(&nal_data->gm_port, 0, LGMNAL_GM_PORT, "lgmnal", GM_API_VERSION); + LGMNAL_GM_UNLOCK(nal_data); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_open returned [%d]\n", gm_status)); + if (gm_status == GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_open succeeded port[%p]\n", nal_data->gm_port)); + } else { + switch(gm_status) { + case(GM_INVALID_PARAMETER): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Invalid Parameter\n")); + break; + case(GM_BUSY): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. GM Busy\n")); + break; + case(GM_NO_SUCH_DEVICE): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. No such device\n")); + break; + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Incompatile lib and driver\n")); + break; + case(GM_OUT_OF_MEMORY): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Out of Memory\n")); + break; + default: + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Unknow error code [%d]\n", gm_status)); + break; + } + LGMNAL_GM_LOCK(nal_data); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + nal_data->small_msg_size = lgmnal_small_msg_size; + nal_data->small_msg_gmsize = gm_min_size_for_length(lgmnal_small_msg_size); + + if (lgmnal_alloc_srxd(nal_data) != LGMNAL_STATUS_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to allocate small rx descriptors\n")); + lgmnal_free_stxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + /* + * Hang out a bunch of small receive buffers + * In fact hang them all out + */ + while((srxd = lgmnal_get_srxd(nal_data, 0))) { + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("giving [%p] to gm_provide_recvive_buffer\n", srxd->buffer)); + LGMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0); + LGMNAL_GM_UNLOCK(nal_data); + } + + /* + * Allocate pools of small tx buffers and descriptors + */ + if (lgmnal_alloc_stxd(nal_data) != LGMNAL_STATUS_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to allocate small tx descriptors\n")); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + /* + * Start the recieve thread + * Initialise the gm_alarm we will use to wake the thread is + * it needs to be stopped + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Initializing receive thread alarm and flag\n")); + gm_initialize_alarm(&nal_data->rxthread_alarm); + nal_data->rxthread_flag = LGMNAL_THREAD_START; + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Starting receive thread\n")); + nal_data->rxthread_pid = kernel_thread(lgmnal_receive_thread, (void*)nal_data, 0); + if (nal_data->rxthread_pid <= 0) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Receive thread failed to start\n")); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + while (nal_data->rxthread_flag != LGMNAL_THREAD_STARTED) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1024); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Waiting for receive thread signs of life\n")); + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("receive thread seems to have started\n")); + nal_data->rxthread_flag = LGMNAL_THREAD_CONTINUE; + + + + /* + * Initialise the portals library + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Getting node id\n")); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_get_node_id(nal_data->gm_port, &local_nid); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + lgmnal_stop_rxthread(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("can't determine node id\n")); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + nal_data->gm_local_nid = local_nid; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local node id is [%u]\n", local_nid)); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid, &global_nid); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to obtain global id\n")); + lgmnal_stop_rxthread(nal_data); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Global node id is [%u][%x]\n", global_nid)); + nal_data->gm_global_nid = global_nid; + +/* + pid = gm_getpid(); +*/ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_pid is [%u]\n", portals_pid)); + portals_nid = (unsigned long)global_nid; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_nid is [%lu]\n", portals_nid)); + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling lib_init\n")); + if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, ac_size) != PTL_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lib_init failed\n")); + lgmnal_stop_rxthread(nal_data); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_init finished\n")); + global_nal_data = nal->nal_data; + return(nal); +} + + + +/* + * Called when module removed + */ +void lgmnal_fini() +{ + lgmnal_data_t *nal_data = global_nal_data; + nal_t *nal = nal_data->nal; + nal_cb_t *nal_cb = nal_data->nal_cb; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_fini\n")); + + PtlNIFini(lgmnal_ni); + lib_fini(nal_cb); + + lgmnal_stop_rxthread(nal_data); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); +} + +EXPORT_SYMBOL(lgmnal_init); +EXPORT_SYMBOL(lgmnal_fini); +EXPORT_SYMBOL(lgmnal_api_forward); +EXPORT_SYMBOL(lgmnal_api_validate); +EXPORT_SYMBOL(lgmnal_api_yield); +EXPORT_SYMBOL(lgmnal_api_lock); +EXPORT_SYMBOL(lgmnal_api_unlock); +EXPORT_SYMBOL(lgmnal_api_shutdown); diff --git a/lnet/klnds/lgmlnd/lgmnal_cb.c b/lnet/klnds/lgmlnd/lgmnal_cb.c new file mode 100644 index 0000000..bb231af --- /dev/null +++ b/lnet/klnds/lgmlnd/lgmnal_cb.c @@ -0,0 +1,248 @@ +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + + +/* + * This file implements the nal cb functions + */ + + +#include "lgmnal.h" + +int lgmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) +{ + lgmnal_srxd_t *srxd = (lgmnal_srxd_t*)private; + int status = PTL_OK; + lgmnal_data_t *nal_data = nal_cb->nal_data; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_recv nal_cb [%p],private[%p], cookie[%p], niov[%d], iov [%p], mlen[%d], rlen[%d]\n", nal_cb, private, cookie, niov, iov, mlen, rlen)); + + if (srxd->type == LGMNAL_SMALL_MESSAGE) { + if (!LGMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, mlen)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_cb_recv. This is not a small message\n")); + } + status = lgmnal_small_receive2(nal_cb, private, cookie, niov, iov, mlen, rlen); + } + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_return status [%d]\n", status)); + return(status); +} + +int lgmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int kniov, ptl_kiov_t *kiov, size_t mlen, size_t rlen) +{ + lgmnal_srxd_t *srxd = (lgmnal_srxd_t*)private; + int status = PTL_OK; + struct iovec *iovec = NULL; + int i = 0; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_recv_pages nal_cb [%p],private[%p], cookie[%p], kniov[%d], kiov [%p], mlen[%d], rlen[%d]\n", nal_cb, private, cookie, kniov, kiov, mlen, rlen)); + + if (srxd->type == LGMNAL_SMALL_MESSAGE) { + PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov); + if (!iovec) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Can't malloc\n")); + return(LGMNAL_STATUS_FAIL); + } + + /* + * map each page and create an iovec for it + */ + for (i=0; ikiov_page, kiov->kiov_len, kiov->kiov_offset)); + iovec->iov_len = kiov->kiov_len; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling kmap", kiov->kiov_page)); + iovec->iov_base = kmap(kiov->kiov_page) + kiov->kiov_offset; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling iov_base is [%p]", iovec->iov_base)); + iovec->iov_len = kiov->kiov_len; + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("calling lgmnal_small_receive2\n")); + status = lgmnal_small_receive2(nal_cb, private, cookie, kniov, iovec, mlen, rlen); + PORTAL_FREE(iovec, sizeof(struct iovec)*kniov); + } + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_return status [%d]\n", status)); + return(status); +} + + +int lgmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, size_t len) +{ + + lgmnal_data_t *nal_data; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_sendnid [%lu] niov[%d] len[%d]\n", nid, niov, len)); + nal_data = nal_cb->nal_data; + + if (LGMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is a small message send\n")); + lgmnal_small_transmit(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("This is a large message send it is not supported yet\n")); +/* + lgmnal_large_transmit1(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); +*/ + return(LGMNAL_STATUS_FAIL); + } + return(PTL_OK); +} + +int lgmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len) +{ + + int i = 0; + lgmnal_data_t *nal_data; + struct iovec *iovec; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_send_pages nid [%lu] niov[%d] len[%d]\n", nid, kniov, len)); + nal_data = nal_cb->nal_data; + if (LGMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) { + /* TO DO fix small message for send pages */ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is a small message send\n")); + PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec)); + + for (i=0; ikiov_page, kiov->kiov_len, kiov->kiov_offset)); + iovec->iov_len = kiov->kiov_len; + iovec->iov_base = kmap(kiov->kiov_page) + kiov->kiov_offset; + iovec->iov_len = kiov->kiov_len; + } + lgmnal_small_transmit(nal_cb, private, cookie, hdr, type, nid, pid, kniov, iovec, len); + PORTAL_FREE(iovec, kniov*sizeof(struct iovec)); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("This is a large message send it is not supported yet\n")); +/* + lgmnal_large_transmit1(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); +*/ + return(LGMNAL_STATUS_FAIL); + } + return(PTL_OK); +} + +int lgmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst, user_ptr src, size_t len) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_read dst [%p] src [%p] len[%d]\n", dst, src, len)); + gm_bcopy(src, dst, len); + return(PTL_OK); +} + +int lgmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, void *src, size_t len) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_write :: dst [%p] src [%p] len[%d]\n", dst, src, len)); + gm_bcopy(src, dst, len); + return(PTL_OK); +} + +int lgmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_callback nal_cb[%p], private[%p], eq[%p], ev[%p]\n", nal_cb, private, eq, ev)); + + if (eq->event_callback != NULL) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("found callback\n")); + eq->event_callback(ev); + } + + return(PTL_OK); +} + +void *lgmnal_cb_malloc(nal_cb_t *nal_cb, size_t len) +{ + void *ptr = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_malloc len[%d]\n", len)); + PORTAL_ALLOC(ptr, len); + return(ptr); +} + +void lgmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_free :: buf[%p] len[%d]\n", buf, len)); + PORTAL_FREE(buf, len); + return; +} + +void lgmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, void **addrkey) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_unmap niov[%d] iov[%], addrkey[%p]\n", niov, iov, addrkey)); + return; +} + +int lgmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, void**addrkey) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_map niov[%d], iov[%p], addrkey[%p], niov, iov, addrkey\n")); + return(PTL_OK); +} + +void lgmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_printf\n")); + lgmnal_print(fmt); + return; +} + +void lgmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags) +{ + lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; + spinlock_t cb_lock = nal_data->cb_lock; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_cli\n")); +/* + local_irq_save(*flags); + spin_lock_irqsave(&cb_lock, *flags); +*/ + spin_lock(&cb_lock); + return; +} + +void lgmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags) +{ + lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; + spinlock_t cb_lock = nal_data->cb_lock; + +/* + local_irq_restore(*flags); + spin_unlock_irqrestore(&cb_lock, *flags); +*/ + spin_unlock(&cb_lock); + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_sti\n")); + return; +} + +int lgmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_dist\n")); + if (dist) + *dist = 27; + return(PTL_OK); +} + + + + +EXPORT_SYMBOL(lgmnal_cb_send); +EXPORT_SYMBOL(lgmnal_cb_send_pages); +EXPORT_SYMBOL(lgmnal_cb_recv); +EXPORT_SYMBOL(lgmnal_cb_recv_pages); +EXPORT_SYMBOL(lgmnal_cb_read); +EXPORT_SYMBOL(lgmnal_cb_write); +EXPORT_SYMBOL(lgmnal_cb_cli); +EXPORT_SYMBOL(lgmnal_cb_sti); +EXPORT_SYMBOL(lgmnal_cb_dist); +EXPORT_SYMBOL(lgmnal_cb_printf); +EXPORT_SYMBOL(lgmnal_cb_map); +EXPORT_SYMBOL(lgmnal_cb_unmap); +EXPORT_SYMBOL(lgmnal_cb_callback); +EXPORT_SYMBOL(lgmnal_cb_free); +EXPORT_SYMBOL(lgmnal_cb_malloc); diff --git a/lnet/klnds/lgmlnd/lgmnal_comm.c b/lnet/klnds/lgmlnd/lgmnal_comm.c new file mode 100644 index 0000000..091f665 --- /dev/null +++ b/lnet/klnds/lgmlnd/lgmnal_comm.c @@ -0,0 +1,464 @@ +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + +/* + * This file contains all lgmnal send and receive functions + */ + +#include "lgmnal.h" + +int +lgmnal_requeue_rxbuffer(lgmnal_data_t *nal_data, lgmnal_srxd_t *srxd) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_requeue_rxbuffer\n")); + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("requeueing srxd[%p] nal_data[%p]\n", srxd, nal_data)); + + LGMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0 ); + LGMNAL_GM_UNLOCK(nal_data); + + return(LGMNAL_STATUS_OK); +} + + +/* + * Handle a bad message + * A bad message is one we don't expect or can't interpret + */ +int +lgmnal_badrx_message(lgmnal_data_t *nal_data, gm_recv_t *recv, lgmnal_srxd_t *srxd) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Can't handle message\n")); + + if (!srxd) + srxd = lgmnal_rxbuffer_to_srxd(nal_data, gm_ntohp(recv->buffer)); + if (srxd) { + lgmnal_requeue_rxbuffer(nal_data, srxd); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Can't find a descriptor for this buffer\n")); + /* + * get rid of it ? + */ + return(LGMNAL_STATUS_FAIL); + } + + return(LGMNAL_STATUS_OK); +} + + +/* + * Start processing a small message receive + * Get here from lgmnal_receive_thread + * Hand off to lib_parse, which calls cb_recv + * which hands back to lgmnal_small_receive2 + * Deal with all endian stuff here (if we can!) + */ +int +lgmnal_small_receive1(lgmnal_data_t *nal_data, gm_recv_t *recv) +{ + lgmnal_srxd_t *srxd = NULL; + void *buffer = NULL; + unsigned int snode, sport, type, length; + lgmnal_msghdr_t *lgmnal_msghdr; + ptl_hdr_t *portals_hdr; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_receive1 nal_data [%p], recv [%p]\n", nal_data, recv)); + + buffer = gm_ntohp(recv->buffer);; + snode = (int)gm_ntoh_u16(recv->sender_node_id); + sport = (int)gm_ntoh_u8(recv->sender_port_id); + type = (int)gm_ntoh_u8(recv->type); + buffer = gm_ntohp(recv->buffer); + length = (int) gm_ntohl(recv->length); + + lgmnal_msghdr = (lgmnal_msghdr_t*)buffer; + portals_hdr = (ptl_hdr_t*)(buffer+LGMNAL_MSGHDR_SIZE); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("rx_event:: Sender node [%d], Sender Port [%d], type [%d], length [%d], buffer [%p]\n", + snode, sport, type, length, buffer)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_msghdr:: Sender node [%u], magic [%lx], type [%d]\n", + lgmnal_msghdr->sender_node_id, lgmnal_msghdr->magic, lgmnal_msghdr->type)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_hdr:: Sender node [%ul], dest_node [%ul]\n", + portals_hdr->src_nid, portals_hdr->dest_nid)); + + + /* + * Get a transmit descriptor for this message + */ + srxd = lgmnal_rxbuffer_to_srxd(nal_data, buffer); + LGMNAL_PRINT(LGMNAL_DEBUG, ("Back from lgmnal_rxbuffer_to_srxd\n")); + if (!srxd) { + LGMNAL_PRINT(LGMNAL_DEBUG, ("Failed to get receive descriptor for this buffer\n")); + lib_parse(nal_data->nal_cb, portals_hdr, srxd); + return(LGMNAL_STATUS_FAIL); + } + srxd->type = LGMNAL_SMALL_MESSAGE; + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling lib_parse buffer is [%p]\n", buffer+LGMNAL_MSGHDR_SIZE)); + /* + * control passes to lib, which calls cb_recv + * cb_recv is responsible for returning the buffer + * for future receive + */ + lib_parse(nal_data->nal_cb, portals_hdr, srxd); + + return(LGMNAL_STATUS_OK); +} + +/* + * Get here from lgmnal_receive_thread, lgmnal_small_receive1 + * lib_parse, cb_recv + * Put data from prewired receive buffer into users buffer(s) + * Hang out the receive buffer again for another receive + * Call lib_finalize + */ +int +lgmnal_small_receive2(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int niov, + struct iovec *iov, size_t mlen, size_t rlen) +{ + lgmnal_srxd_t *srxd = NULL; + void *buffer = NULL; + lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_receive2 niov [%d] mlen[%d]\n", niov, mlen)); + + if (!private) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_small_receive2 no context\n")); + lib_finalize(nal_cb, private, cookie); + return(PTL_FAIL); + } + + srxd = (lgmnal_srxd_t*)private; + buffer = srxd->buffer; + buffer += sizeof(lgmnal_msghdr_t); + buffer += sizeof(ptl_hdr_t); + + while(niov--) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing [%p] len [%d]\n", iov, iov->iov_len)); + gm_bcopy(buffer, iov->iov_base, iov->iov_len); + buffer += iov->iov_len; + iov++; + } + + + /* + * let portals library know receive is complete + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling lib_finalize\n")); + if (lib_finalize(nal_cb, private, cookie) != PTL_OK) { + /* TO DO what to do with failed lib_finalise? */ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lib_finalize failed\n")); + } + /* + * return buffer so it can be used again + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling gm_provide_receive_buffer\n")); + LGMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, srxd->gmsize, GM_LOW_PRIORITY, 0); + LGMNAL_GM_UNLOCK(nal_data); + + return(PTL_OK); +} + + + +/* + * The recevive thread + * This guy wait in gm_blocking_recvive and gets + * woken up when the myrinet adaptor gets an interrupt. + * Hands off processing of small messages and blocks again + */ +int +lgmnal_receive_thread(void *arg) +{ + lgmnal_data_t *nal_data; + gm_recv_event_t *rxevent = NULL; + gm_recv_t *recv = NULL; + void *buffer; + + if (!arg) { + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("RXTHREAD:: This is the lgmnal_receive_thread. NO nal_data. Exiting\n", arg)); + return(-1); + } + + nal_data = (lgmnal_data_t*)arg; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("RXTHREAD:: This is the lgmnal_receive_thread nal_data is [%p]\n", arg)); + + nal_data->rxthread_flag = LGMNAL_THREAD_STARTED; + while (nal_data->rxthread_flag == LGMNAL_THREAD_STARTED) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: lgmnal_receive_threads waiting for LGMNAL_CONTINUE flag\n")); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1024); + + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: calling daemonize\n")); + daemonize(); + LGMNAL_GM_LOCK(nal_data); + while(nal_data->rxthread_flag == LGMNAL_THREAD_CONTINUE) { + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Receive thread waiting\n")); + rxevent = gm_blocking_receive_no_spin(nal_data->gm_port); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: receive thread got [%s]\n", lgmnal_rxevent(rxevent))); + if (nal_data->rxthread_flag != LGMNAL_THREAD_CONTINUE) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Receive thread time to exit\n")); + break; + } + switch (GM_RECV_EVENT_TYPE(rxevent)) { + + case(GM_RECV_EVENT): + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: GM_RECV_EVENT\n")); + recv = (gm_recv_t*)&(rxevent->recv); + buffer = gm_ntohp(recv->buffer); + if (((lgmnal_msghdr_t*)buffer)->type == LGMNAL_SMALL_MESSAGE) { + LGMNAL_GM_UNLOCK(nal_data); + lgmnal_small_receive1(nal_data, recv); + LGMNAL_GM_LOCK(nal_data); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("RXTHREAD:: Unsupported message type\n")); + lgmnal_badrx_message(nal_data, recv, NULL); + } + break; + case(_GM_SLEEP_EVENT): + /* + * Blocking receive above just returns + * immediatly with _GM_SLEEP_EVENT + * Don't know what this is + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Sleeping in gm_unknown\n")); + LGMNAL_GM_UNLOCK(nal_data); + gm_unknown(nal_data->gm_port, rxevent); + LGMNAL_GM_LOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Awake from gm_unknown\n")); + break; + + default: + /* + * Don't know what this is + * gm_unknown will make sense of it + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Passing event to gm_unknown\n")); + gm_unknown(nal_data->gm_port, rxevent); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Processed unknown event\n")); + + } + + + } + LGMNAL_GM_UNLOCK(nal_data); + nal_data->rxthread_flag = LGMNAL_THREAD_STOPPED; + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("RXTHREAD:: The lgmnal_receive_thread nal_data [%p] is exiting\n", nal_data)); + return(LGMNAL_STATUS_OK); +} + + +int +lgmnal_small_transmit(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, + ptl_nid_t global_nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, int size) +{ + lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; + lgmnal_stxd_t *stxd = NULL; + void *buffer = NULL; + lgmnal_msghdr_t *msghdr = NULL; + int tot_size = 0; + unsigned int local_nid; + gm_status_t gm_status = GM_SUCCESS; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_transmit nal_cb [%p] private [%p] cookie [%p] hdr [%p] type [%d] global_nid [%u][%x] pid [%d] niov [%d] iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, global_nid, global_nid, pid, niov, iov, size)); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_hdr:: dest_nid [%lu], src_nid [%lu]\n", hdr->dest_nid, hdr->src_nid)); + + if (!nal_data) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("no nal_data\n")); + return(LGMNAL_STATUS_FAIL); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("nal_data [%p]\n", nal_data)); + } + + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, &local_nid); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to obtain local id\n")); + return(LGMNAL_STATUS_FAIL); + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local Node_id is [%u][%x]\n", local_nid, local_nid)); + + stxd = lgmnal_get_stxd(nal_data, 1); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("stxd [%p]\n", stxd)); + + stxd->type = LGMNAL_SMALL_MESSAGE; + stxd->cookie = cookie; + + /* + * Copy lgmnal_msg_hdr and portals header to the transmit buffer + * Then copy the data in + */ + buffer = stxd->buffer; + msghdr = (lgmnal_msghdr_t*)buffer; + + msghdr->magic = LGMNAL_MAGIC; + msghdr->type = LGMNAL_SMALL_MESSAGE; + msghdr->sender_node_id = nal_data->gm_global_nid; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing msghdr at [%p]\n", buffer)); + + buffer += sizeof(lgmnal_msghdr_t); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Advancing buffer pointer by [%x] to [%p]\n", sizeof(lgmnal_msghdr_t), buffer)); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing portals hdr at [%p]\n", buffer)); + gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); + + buffer += sizeof(ptl_hdr_t); + + while(niov--) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing iov [%p] len [%d] to [%p]\n", iov, iov->iov_len, buffer)); + gm_bcopy(iov->iov_base, buffer, iov->iov_len); + buffer+= iov->iov_len; + iov++; + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("sending\n")); + tot_size = size+sizeof(ptl_hdr_t)+sizeof(lgmnal_msghdr_t); + + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_send_to_peer port [%p] buffer [%p] gmsize [%d] msize [%d] global_nid [%u][%x] local_nid[%d] stxd [%p]\n", + nal_data->gm_port, stxd->buffer, stxd->gmsize, tot_size, global_nid, global_nid, local_nid, stxd)); + LGMNAL_GM_LOCK(nal_data); + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, stxd->gmsize, tot_size, GM_LOW_PRIORITY, local_nid, lgmnal_small_tx_done, (void*)stxd); + + LGMNAL_GM_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("done\n")); + + return(PTL_OK); +} + + +void +lgmnal_small_tx_done(gm_port_t *gm_port, void *context, gm_status_t status) +{ + lgmnal_stxd_t *stxd = (lgmnal_stxd_t*)context; + lib_msg_t *cookie = stxd->cookie; + lgmnal_data_t *nal_data = (lgmnal_data_t*)stxd->nal_data; + nal_cb_t *nal_cb = nal_data->nal_cb; + + if (!stxd) { + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("send completion event for unknown stxd\n")); + return; + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Result of send stxd [%p] is [%s]\n", stxd, lgmnal_gm_error(status))); + /* TO DO figure out which sends are worth retrying and get a send token to retry */ + if (lib_finalize(nal_cb, stxd, cookie) != PTL_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Call to lib_finalize failed for stxd [%p]\n", stxd)); + } + lgmnal_return_stxd(nal_data, stxd); + return; +} + + +void +lgmnal_large_tx1_done(gm_port_t *gm_port, void *context, gm_status_t status) +{ + +} + +/* + * Begin a large transmit + */ +int +lgmnal_large_transmit1(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, + ptl_nid_t global_nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, int size) +{ + + lgmnal_data_t *nal_data; + lgmnal_stxd_t *stxd = NULL; + void *buffer = NULL; + lgmnal_msghdr_t *msghdr = NULL; + unsigned int local_nid; + int mlen = 0; /* the size of the init message data */ + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_large_transmit1 nal_cb [%p] private [%p], cookie [%p] hdr [%p], type [%d] global_nid [%u], pid [%d], + niov [%d], iov [%p], size [%d]\n", + nal_cb, private, cookie, hdr, type, global_nid, pid, niov, iov, size)); + + if (nal_cb) + nal_data = (lgmnal_data_t*)nal_cb->nal_data; + else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("no nal_cb.\n")); + return(LGMNAL_STATUS_FAIL); + } + + + /* + * TO DO large transmit uses stxd. Should it have control descriptor? + */ + stxd = lgmnal_get_stxd(nal_data, 1); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("stxd [%p]\n", stxd)); + + stxd->type = LGMNAL_LARGE_MESSAGE_INIT; + stxd->cookie = cookie; + + /* + * Copy lgmnal_msg_hdr and portals header to the transmit buffer + * Then copy the iov in + */ + buffer = stxd->buffer; + msghdr = (lgmnal_msghdr_t*)buffer; + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing msghdr at [%p]\n", buffer)); + + msghdr->magic = LGMNAL_MAGIC; + msghdr->type = LGMNAL_LARGE_MESSAGE_INIT; + msghdr->sender_node_id = nal_data->gm_global_nid; + msghdr->stxd = stxd; + buffer += sizeof(lgmnal_msghdr_t); + mlen = sizeof(lgmnal_msghdr_t); + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing portals hdr at [%p]\n", buffer)); + + gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); + buffer += sizeof(ptl_hdr_t); + mlen += sizeof(ptl_hdr_t); + + /* + * Store the iovs in the stxd for we can get them later + * in large_transmit2 + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Copying iov [%p] to [%p]\n", iov, stxd->iov)); + gm_bcopy(iov, stxd->iov, niov*sizeof(struct iovec)); + stxd->niov = niov; + + /* + * Send the init message to the target + */ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("sending mlen [%d]\n", mlen)); + LGMNAL_GM_LOCK(nal_data); + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, stxd->gmsize, mlen, GM_LOW_PRIORITY, local_nid, lgmnal_large_tx1_done, (void*)stxd); + LGMNAL_GM_UNLOCK(nal_data); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("done\n")); + + return(PTL_OK); +} + + + + +EXPORT_SYMBOL(lgmnal_requeue_rxbuffer); +EXPORT_SYMBOL(lgmnal_badrx_message); +EXPORT_SYMBOL(lgmnal_large_tx1_done); +EXPORT_SYMBOL(lgmnal_large_transmit1); +EXPORT_SYMBOL(lgmnal_small_receive1); +EXPORT_SYMBOL(lgmnal_small_receive2); +EXPORT_SYMBOL(lgmnal_receive_thread); +EXPORT_SYMBOL(lgmnal_small_transmit); +EXPORT_SYMBOL(lgmnal_small_tx_done); diff --git a/lnet/klnds/lgmlnd/lgmnal_module.c b/lnet/klnds/lgmlnd/lgmnal_module.c new file mode 100644 index 0000000..51383fc --- /dev/null +++ b/lnet/klnds/lgmlnd/lgmnal_module.c @@ -0,0 +1,127 @@ +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + + +#include "lgmnal.h" + + +ptl_handle_ni_t lgmnal_ni; + + +int +lgmnal_cmd(struct portal_ioctl_data *data, void *private) +{ + lgmnal_data_t *nal_data = NULL; + char *name = NULL; + int nid = -2; + int gnid; + gm_status_t gm_status; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cmd [d] private [%p]\n", data->ioc_nal_cmd, private)); + nal_data = (lgmnal_data_t*)private; + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("nal_data is [%p]\n", nal_data)); + switch(data->ioc_nal_cmd) { + /* + * just reuse already defined GET_NID. Should define LGMNAL version + */ + case(LGMNAL_IOC_GET_GNID): + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_cmd GETNID (Get GM Global Network Id\n")); + + PORTAL_ALLOC(name, data->ioc_plen1); + copy_from_user(name, data->ioc_pbuf1, data->ioc_plen1); + + LGMNAL_GM_LOCK(nal_data); + nid = gm_host_name_to_node_id(nal_data->gm_port, name); + LGMNAL_GM_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local node id is [%d]\n", nid)); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_node_id_to_global_id(nal_data->gm_port, nid, &gnid); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_node_id_to_global_id failed\n", gm_status)); + return(-1); + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Global node is is [%u][%x]\n", gnid, gnid)); + copy_to_user(data->ioc_pbuf2, &gnid, data->ioc_plen2); + break; + default: + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_cmd UNKNOWN[%d]\n", data->ioc_nal_cmd)); + data->ioc_nid2 = -1; + } + + + return(0); +} + +int lgmnal_small_msg_size = 81920; +int lgmnal_debug_level = 1; + +int +init_module() +{ + int status; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("This is the lgmnal module initialisation routine\n")); + + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling lgmnal_init\n")); + status = PtlNIInit(lgmnal_init, 32, 4, 0, &lgmnal_ni); + if (status == PTL_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Portals LGMNAL initialised ok lgmnal_ni [%lx]\n", lgmnal_ni)); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Portals LGMNAL Failed to initialise\n")); + return(1); + + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling kportal_nal_register\n")); + /* + * global_nal_data is set by lgmnal_init + */ + if (kportal_nal_register(LGMNAL, &lgmnal_cmd, global_nal_data) != 0) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("kportal_nal_register failed\n")); + return(1); + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling PORTAL_SYMBOL_REGISTER\n")); + PORTAL_SYMBOL_REGISTER(lgmnal_ni); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is the end of the lgmnal module initialisation routine")); + + + return(0); +} + + +void cleanup_module() +{ + int interface=0; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Cleaning up lgmnal module")); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Interface [%d] Calling shutdown\n", interface)); + kportal_nal_unregister(LGMNAL); + PORTAL_SYMBOL_UNREGISTER(lgmnal_ni); + lgmnal_fini(); + global_nal_data = NULL; + return; +} + + +EXPORT_SYMBOL(lgmnal_ni); +EXPORT_SYMBOL(lgmnal_debug_level); + +MODULE_PARM(lgmnal_small_msg_size, "i"); +MODULE_PARM(lgmnal_debug_level, "i"); + +MODULE_AUTHOR("Morgan Doyle. morgan.doyle@hp.com"); + +MODULE_DESCRIPTION("A Portals kernel NAL for Myrinet GM2. [0= DEFAULT_LEN) { + PORTAL_ALLOC(varbuf, len+1+8); + if (!varbuf) { + printk("lgmnal_cb_printf Failed to malloc\n"); + printk("Truncated message is\n"); + printk(fixedbuf); + va_end(ap); + return; + } + sprintf(varbuf, "LGMNAL::"); + len = vsnprintf(varbuf+8, len+1, fmt, ap); + } else { + varbuf = fixedbuf; + } + va_end(ap); + printk(varbuf); + if (fixedbuf != varbuf) + PORTAL_FREE(varbuf, len+1+8); + return; +} + + +/* + * allocate a number of small tx buffers and register with GM + * so they are wired and set up for DMA. This is a costly operation. + * Also allocate a corrosponding descriptor to keep track of + * the buffer. + * Put all descriptors on singly linked list to be available to send function. + * This function is only called when the API mutex is held (init or shutdown), + * so there is no need to hold the txd spinlock. + */ +int +lgmnal_alloc_stxd(lgmnal_data_t *nal_data) +{ + int ntx = 0, nstx = 0, i = 0; + lgmnal_stxd_t *txd = NULL; + void *txbuffer = NULL; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_alloc_small tx\n")); + + LGMNAL_GM_LOCK(nal_data); + ntx = gm_num_send_tokens(nal_data->gm_port); + LGMNAL_GM_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("total number of send tokens available is [%d]\n", ntx)); + + nstx = ntx/2; + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocated [%d] send tokens to small messages\n", nstx)); + + +#ifdef LGMNAL_USE_GM_HASH + nal_data->stxd_hash = gm_create_hash(gm_hash_compare_ptrs, gm_hash_hash_ptr, 0, sizeof(void*), nstx, 0); + if (!nal_data->srxd_hash) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to create hash table\n\n")); + return(LGMNAL_STATUS_NOMEM); + } +#else + nal_data->stxd_hash = NULL; +#endif + + /* + * A semaphore is initialised with the + * number of transmit tokens available. + * To get a stxd, acquire the token semaphore. + * this decrements the available token count + * (if no tokens you block here, someone returning a + * stxd will release the semaphore and wake you) + * When token is obtained acquire the spinlock + * to manipulate the list + */ + LGMNAL_TXD_TOKEN_INIT(nal_data, nstx); + LGMNAL_TXD_LOCK_INIT(nal_data); + + for (i=0; i<=nstx; i++) { + PORTAL_ALLOC(txd, sizeof(lgmnal_stxd_t)); + if (!txd) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc txd [%d]\n", i)); + return(LGMNAL_STATUS_NOMEM); + } +#if 0 + PORTAL_ALLOC(txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); + if (!txbuffer) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc txbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); + PORTAL_FREE(txd, sizeof(lgmnal_stxd_t)); + return(LGMNAL_STATUS_FAIL); + } + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_register_memory with port [%p] txbuffer [%p], size [%d]\n", + nal_data->gm_port, txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data))); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_register_memory failed buffer [%p], index [%d]\n", txbuffer, i)); + switch(gm_status) { + case(GM_FAILURE): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_FAILURE\n")); + break; + case(GM_PERMISSION_DENIED): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_PERMISSION_DENIED\n")); + break; + case(GM_INVALID_PARAMETER): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_INVALID_PARAMETER\n")); + break; + default: + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Unknown error\n")); + break; + } + return(LGMNAL_STATUS_FAIL); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_register_memory ok for buffer [%p], index [%d]\n", txbuffer, i)); + } +#else + LGMNAL_GM_LOCK(nal_data); + txbuffer = gm_dma_malloc(nal_data->gm_port, LGMNAL_SMALL_MSG_SIZE(nal_data)); + LGMNAL_GM_UNLOCK(nal_data); + if (!txbuffer) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to gm_dma_malloc txbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); + PORTAL_FREE(txd, sizeof(lgmnal_stxd_t)); + return(LGMNAL_STATUS_FAIL); + } +#endif + + txd->buffer = txbuffer; + txd->size = LGMNAL_SMALL_MSG_SIZE(nal_data); + txd->gmsize = gm_min_size_for_length(txd->size); + txd->nal_data = (struct _lgmnal_data_t*)nal_data; + + if (lgmnal_hash_add(&nal_data->stxd_hash, (void*)txbuffer, (void*)txd)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to create hash entry\n")); + return(LGMNAL_STATUS_FAIL); + } + + + txd->next = nal_data->stxd; + nal_data->stxd = txd; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Registered txd [%p] with buffer [%p], size [%d]\n", txd, txd->buffer, txd->size)); + } + + return(LGMNAL_STATUS_OK); +} + +/* Free the list of wired and gm_registered small tx buffers and the tx descriptors + that go along with them. + * This function is only called when the API mutex is held (init or shutdown), + * so there is no need to hold the txd spinlock. + */ +void +lgmnal_free_stxd(lgmnal_data_t *nal_data) +{ + lgmnal_stxd_t *txd = nal_data->stxd, *_txd = NULL; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_free_small tx\n")); + + while(txd) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Freeing txd [%p] with buffer [%p], size [%d]\n", txd, txd->buffer, txd->size)); + _txd = txd; + txd = txd->next; +#if 0 + LGMNAL_GM_LOCK(nal_data); + gm_deregister_memory(nal_data->gm_port, _txd->buffer, _txd->size); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_txd->buffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); +#else + LGMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _txd->buffer); + LGMNAL_GM_UNLOCK(nal_data); +#endif + PORTAL_FREE(_txd, sizeof(lgmnal_stxd_t)); + } + return; +} + + +/* + * Get a txd from the list + * This get us a wired and gm_registered small tx buffer. + * This implicitly gets us a send token also. + */ +lgmnal_stxd_t * +lgmnal_get_stxd(lgmnal_data_t *nal_data, int block) +{ + + lgmnal_stxd_t *txd = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_get_stxd nal_data [%p] block[%d]\n", + nal_data, block)); + + if (block) { + LGMNAL_TXD_GETTOKEN(nal_data); + } else { + if (LGMNAL_TXD_TRYGETTOKEN(nal_data)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_get_stxd can't get token\n")); + return(NULL); + } + } + LGMNAL_TXD_LOCK(nal_data); + txd = nal_data->stxd; + if (txd) + nal_data->stxd = txd->next; + LGMNAL_TXD_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_get_stxd got [%p], head is [%p]\n", txd, nal_data->stxd)); + return(txd); +} + +/* + * Return a txd to the list + */ +void +lgmnal_return_stxd(lgmnal_data_t *nal_data, lgmnal_stxd_t *txd) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_return_stxd nal_data [%p], txd[%p]\n", nal_data, txd)); + + LGMNAL_TXD_LOCK(nal_data); + txd->next = nal_data->stxd; + nal_data->stxd = txd; + LGMNAL_TXD_UNLOCK(nal_data); + LGMNAL_TXD_RETURNTOKEN(nal_data); + return; +} + + +/* + * allocate a number of small rx buffers and register with GM + * so they are wired and set up for DMA. This is a costly operation. + * Also allocate a corrosponding descriptor to keep track of + * the buffer. + * Put all descriptors on singly linked list to be available to receive thread. + * This function is only called when the API mutex is held (init or shutdown), + * so there is no need to hold the rxd spinlock. + */ +int +lgmnal_alloc_srxd(lgmnal_data_t *nal_data) +{ + int nrx = 0, nsrx = 0, i = 0; + lgmnal_srxd_t *rxd = NULL; + void *rxbuffer = NULL; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_alloc_small rx\n")); + + LGMNAL_GM_LOCK(nal_data); + nrx = gm_num_receive_tokens(nal_data->gm_port); + LGMNAL_GM_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("total number of receive tokens available is [%d]\n", nrx)); + + nsrx = nrx/2; + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocated [%d] receive tokens to small messages\n", nsrx)); + + +#ifdef LGMNAL_USE_GM_HASH + LGMNAL_GM_LOCK(nal_data); + nal_data->srxd_hash = gm_create_hash(gm_hash_compare_ptrs, gm_hash_hash_ptr, 0, sizeof(void*), nsrx, 0); + LGMNAL_GM_UNLOCK(nal_data); + if (!nal_data->srxd_hash) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to create hash table\n")); + return(LGMNAL_STATUS_NOMEM); + } +#else + nal_data->srxd_hash = NULL; +#endif + + LGMNAL_RXD_TOKEN_INIT(nal_data, nsrx); + LGMNAL_RXD_LOCK_INIT(nal_data); + + for (i=0; i<=nsrx; i++) { + PORTAL_ALLOC(rxd, sizeof(lgmnal_srxd_t)); + if (!rxd) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc rxd [%d]\n", i)); + return(LGMNAL_STATUS_NOMEM); + } +#if 0 + PORTAL_ALLOC(rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); + if (!rxbuffer) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc rxbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); + PORTAL_FREE(rxd, sizeof(lgmnal_srxd_t)); + return(LGMNAL_STATUS_FAIL); + } + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_register_memory with port [%p] rxbuffer [%p], size [%d]\n", + nal_data->gm_port, rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data))); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_register_memory failed buffer [%p], index [%d]\n", rxbuffer, i)); + switch(gm_status) { + case(GM_FAILURE): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_FAILURE\n")); + break; + case(GM_PERMISSION_DENIED): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_PERMISSION_DENIED\n")); + break; + case(GM_INVALID_PARAMETER): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_INVALID_PARAMETER\n")); + break; + default: + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Unknown GM error[%d]\n", gm_status)); + break; + + } + return(LGMNAL_STATUS_FAIL); + } +#else + LGMNAL_GM_LOCK(nal_data); + rxbuffer = gm_dma_malloc(nal_data->gm_port, LGMNAL_SMALL_MSG_SIZE(nal_data)); + LGMNAL_GM_UNLOCK(nal_data); + if (!rxbuffer) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to gm_dma_malloc rxbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); + PORTAL_FREE(rxd, sizeof(lgmnal_srxd_t)); + return(LGMNAL_STATUS_FAIL); + } +#endif + + rxd->buffer = rxbuffer; + rxd->size = LGMNAL_SMALL_MSG_SIZE(nal_data); + rxd->gmsize = gm_min_size_for_length(rxd->size); + + if (lgmnal_hash_add(&nal_data->srxd_hash, (void*)rxbuffer, (void*)rxd) != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to create hash entry rxd[%p] for rxbuffer[%p]\n", rxd, rxbuffer)); + return(LGMNAL_STATUS_FAIL); + } + + rxd->next = nal_data->srxd; + nal_data->srxd = rxd; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Registered rxd [%p] with buffer [%p], size [%d]\n", rxd, rxd->buffer, rxd->size)); + } + + return(LGMNAL_STATUS_OK); +} + + + +/* Free the list of wired and gm_registered small rx buffers and the rx descriptors + * that go along with them. + * This function is only called when the API mutex is held (init or shutdown), + * so there is no need to hold the rxd spinlock. + */ +void +lgmnal_free_srxd(lgmnal_data_t *nal_data) +{ + lgmnal_srxd_t *rxd = nal_data->srxd, *_rxd = NULL; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_free_small rx\n")); + + while(rxd) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Freeing rxd [%p] with buffer [%p], size [%d]\n", rxd, rxd->buffer, rxd->size)); + _rxd = rxd; + rxd = rxd->next; + +#if 0 + LGMNAL_GM_LOCK(nal_data); + gm_deregister_memory(nal_data->gm_port, _rxd->buffer, _rxd->size); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_rxd->buffer, LGMNAL_SMALL_RXBUFFER_SIZE); +#else + LGMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _rxd->buffer); + LGMNAL_GM_UNLOCK(nal_data); +#endif + PORTAL_FREE(_rxd, sizeof(lgmnal_srxd_t)); + } + return; +} + + +/* + * Get a rxd from the free list + * This get us a wired and gm_registered small rx buffer. + * This implicitly gets us a receive token also. + */ +lgmnal_srxd_t * +lgmnal_get_srxd(lgmnal_data_t *nal_data, int block) +{ + + lgmnal_srxd_t *rxd = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_get_srxd nal_data [%p] block [%d]\n", nal_data, block)); + + if (block) { + LGMNAL_RXD_GETTOKEN(nal_data); + } else { + if (LGMNAL_RXD_TRYGETTOKEN(nal_data)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_get_srxd Can't get token\n")); + return(NULL); + } + } + LGMNAL_RXD_LOCK(nal_data); + rxd = nal_data->srxd; + if (rxd) + nal_data->srxd = rxd->next; + LGMNAL_RXD_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_get_srxd got [%p], head is [%p]\n", rxd, nal_data->srxd)); + return(rxd); +} + +/* + * Return an rxd to the list + */ +void +lgmnal_return_srxd(lgmnal_data_t *nal_data, lgmnal_srxd_t *rxd) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_return_srxd nal_data [%p], rxd[%p]\n", nal_data, rxd)); + + LGMNAL_RXD_LOCK(nal_data); + rxd->next = nal_data->srxd; + nal_data->srxd = rxd; + LGMNAL_RXD_UNLOCK(nal_data); + LGMNAL_RXD_RETURNTOKEN(nal_data); + return; +} + +/* + * Given a pointer to a srxd find + * the relevant descriptor for it + * This is done by searching a hash + * list that is created when the srxd's + * are created + */ +lgmnal_srxd_t * +lgmnal_rxbuffer_to_srxd(lgmnal_data_t *nal_data, void *rxbuffer) +{ + lgmnal_srxd_t *srxd = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_rxbuffer_to_srxd nal_data [%p], rxbuffer [%p]\n", nal_data, rxbuffer)); +#ifdef LGMNAL_USE_GM_HASH + srxd = gm_hash_find(nal_data->srxd_hash, rxbuffer); +#else + srxd = lgmnal_hash_find(nal_data->srxd_hash, rxbuffer); +#endif + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("srxd is [%p]\n", srxd)); + return(srxd); +} + + +void +lgmnal_stop_rxthread(lgmnal_data_t *nal_data) +{ + int delay = 15; + + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Attempting to stop rxthread nal_data [%p]\n", nal_data)); + + if (nal_data->rxthread_flag != LGMNAL_THREAD_CONTINUE) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("thread flag not correctly set\n")); + } + + nal_data->rxthread_flag = LGMNAL_THREAD_STOP; + LGMNAL_GM_LOCK(nal_data); + gm_set_alarm(nal_data->gm_port, &nal_data->rxthread_alarm, 10, NULL, NULL); + LGMNAL_GM_UNLOCK(nal_data); + + while(nal_data->rxthread_flag == LGMNAL_THREAD_STOP && delay--) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_stop_rxthread sleeping\n")); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1024); + } + + if (nal_data->rxthread_flag == LGMNAL_THREAD_STOP) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("I DON'T KNOW HOW TO WAKE THE THREAD\n")); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RX THREAD SEEMS TO HAVE STOPPED\n")); + } + +} + + + +char * +lgmnal_gm_error(gm_status_t status) +{ + switch(status) { + case(GM_SUCCESS): + return("SUCCESS"); + case(GM_FAILURE): + return("FAILURE"); + case(GM_INPUT_BUFFER_TOO_SMALL): + return("INPUT_BUFFER_TOO_SMALL"); + case(GM_OUTPUT_BUFFER_TOO_SMALL): + return("OUTPUT_BUFFER_TOO_SMALL"); + case(GM_TRY_AGAIN ): + return("TRY_AGAIN"); + case(GM_BUSY): + return("BUSY"); + case(GM_MEMORY_FAULT): + return("MEMORY_FAULT"); + case(GM_INTERRUPTED): + return("INTERRUPTED"); + case(GM_INVALID_PARAMETER): + return("INVALID_PARAMETER"); + case(GM_OUT_OF_MEMORY): + return("OUT_OF_MEMORY"); + case(GM_INVALID_COMMAND): + return("INVALID_COMMAND"); + case(GM_PERMISSION_DENIED): + return("PERMISSION_DENIED"); + case(GM_INTERNAL_ERROR): + return("INTERNAL_ERROR"); + case(GM_UNATTACHED): + return("UNATTACHED"); + case(GM_UNSUPPORTED_DEVICE): + return("UNSUPPORTED_DEVICE"); + case(GM_SEND_TIMED_OUT): + return("GM_SEND_TIMEDOUT"); + case(GM_SEND_REJECTED): + return("GM_SEND_REJECTED"); + case(GM_SEND_TARGET_PORT_CLOSED): + return("GM_SEND_TARGET_PORT_CLOSED"); + case(GM_SEND_TARGET_NODE_UNREACHABLE): + return("GM_SEND_TARGET_NODE_UNREACHABLE"); + case(GM_SEND_DROPPED): + return("GM_SEND_DROPPED"); + case(GM_SEND_PORT_CLOSED): + return("GM_SEND_PORT_CLOSED"); + case(GM_NODE_ID_NOT_YET_SET): + return("GM_NODE_ID_NOT_YET_SET"); + case(GM_STILL_SHUTTING_DOWN): + return("GM_STILL_SHUTTING_DOWN"); + case(GM_CLONE_BUSY): + return("GM_CLONE_BUSY"); + case(GM_NO_SUCH_DEVICE): + return("GM_NO_SUCH_DEVICE"); + case(GM_ABORTED): + return("GM_ABORTED"); + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + return("GM_INCOMPATIBLE_LIB_AND_DRIVER"); + case(GM_UNTRANSLATED_SYSTEM_ERROR): + return("GM_UNTRANSLATED_SYSTEM_ERROR"); + case(GM_ACCESS_DENIED): + return("GM_ACCESS_DENIED"); + + +/* + * These ones are in the docs but aren't in the header file + case(GM_DEV_NOT_FOUND): + return("GM_DEV_NOT_FOUND"); + case(GM_INVALID_PORT_NUMBER): + return("GM_INVALID_PORT_NUMBER"); + case(GM_UC_ERROR): + return("GM_US_ERROR"); + case(GM_PAGE_TABLE_FULL): + return("GM_PAGE_TABLE_FULL"); + case(GM_MINOR_OVERFLOW): + return("GM_MINOR_OVERFLOW"); + case(GM_SEND_ORPHANED): + return("GM_SEND_ORPHANED"); + case(GM_HARDWARE_FAULT): + return("GM_HARDWARE_FAULT"); + case(GM_DATA_CORRUPTED): + return("GM_DATA_CORRUPTED"); + case(GM_TIMED_OUT): + return("GM_TIMED_OUT"); + case(GM_USER_ERROR): + return("GM_USER_ERROR"); + case(GM_NO_MATCH): + return("GM_NOMATCH"); + case(GM_NOT_SUPPORTED_IN_KERNEL): + return("GM_NOT_SUPPORTED_IN_KERNEL"); + case(GM_NOT_SUPPORTED_ON_ARCH): + return("GM_NOT_SUPPORTED_ON_ARCH"); + case(GM_PTE_REF_CNT_OVERFLOW): + return("GM_PTR_REF_CNT_OVERFLOW"); + case(GM_NO_DRIVER_SUPPORT): + return("GM_NO_DRIVER_SUPPORT"); + case(GM_FIRMWARE_NOT_RUNNING): + return("GM_FIRMWARE_NOT_RUNNING"); + + * These ones are in the docs but aren't in the header file + */ + default: + return("UNKNOWN GM ERROR CODE"); + } +} + + +char * +lgmnal_rxevent(gm_recv_event_t *ev) +{ + short event; + char msg[24]; + event = GM_RECV_EVENT_TYPE(ev); + switch(event) { + case(GM_NO_RECV_EVENT): + return("GM_NO_RECV_EVENT"); + case(GM_SENDS_FAILED_EVENT): + return("GM_SEND_FAILED_EVENT"); + case(GM_ALARM_EVENT): + return("GM_ALARM_EVENT"); + case(GM_SENT_EVENT): + return("GM_SENT_EVENT"); + case(_GM_SLEEP_EVENT): + return("_GM_SLEEP_EVENT"); + case(GM_RAW_RECV_EVENT): + return("GM_RAW_RECV_EVENT"); + case(GM_BAD_SEND_DETECTED_EVENT): + return("GM_BAD_SEND_DETECTED_EVENT"); + case(GM_SEND_TOKEN_VIOLATION_EVENT): + return("GM_SEND_TOKEN_VIOLATION_EVENT"); + case(GM_RECV_TOKEN_VIOLATION_EVENT): + return("GM_RECV_TOKEN_VIOLATION_EVENT"); + case(GM_BAD_RECV_TOKEN_EVENT): + return("GM_BAD_RECV_TOKEN_EVENT"); + case(GM_ALARM_VIOLATION_EVENT): + return("GM_ALARM_VIOLATION_EVENT"); + case(GM_RECV_EVENT): + return("GM_RECV_EVENT"); + case(GM_HIGH_RECV_EVENT): + return("GM_HIGH_RECV_EVENT"); + case(GM_PEER_RECV_EVENT): + return("GM_PEER_RECV_EVENT"); + case(GM_HIGH_PEER_RECV_EVENT): + return("GM_HIGH_PEER_RECV_EVENT"); + case(GM_FAST_RECV_EVENT): + return("GM_FAST_RECV_EVENT"); + case(GM_FAST_HIGH_RECV_EVENT): + return("GM_FAST_HIGH_RECV_EVENT"); + case(GM_FAST_PEER_RECV_EVENT): + return("GM_FAST_PEER_RECV_EVENT"); + case(GM_FAST_HIGH_PEER_RECV_EVENT): + return("GM_FAST_HIGH_PEER_RECV_EVENT"); + case(GM_REJECTED_SEND_EVENT): + return("GM_REJECTED_SEND_EVENT"); + case(GM_ORPHANED_SEND_EVENT): + return("GM_ORPHANED_SEND_EVENT"); + case(GM_BAD_RESEND_DETECTED_EVENT): + return("GM_BAD_RESEND_DETETED_EVENT"); + case(GM_DROPPED_SEND_EVENT): + return("GM_DROPPED_SEND_EVENT"); + case(GM_BAD_SEND_VMA_EVENT): + return("GM_BAD_SEND_VMA_EVENT"); + case(GM_BAD_RECV_VMA_EVENT): + return("GM_BAD_RECV_VMA_EVENT"); + case(_GM_FLUSHED_ALARM_EVENT): + return("GM_FLUSHED_ALARM_EVENT"); + case(GM_SENT_TOKENS_EVENT): + return("GM_SENT_TOKENS_EVENTS"); + case(GM_IGNORE_RECV_EVENT): + return("GM_IGNORE_RECV_EVENT"); + case(GM_ETHERNET_RECV_EVENT): + return("GM_ETHERNET_RECV_EVENT"); + case(GM_NEW_NO_RECV_EVENT): + return("GM_NEW_NO_RECV_EVENT"); + case(GM_NEW_SENDS_FAILED_EVENT): + return("GM_NEW_SENDS_FAILED_EVENT"); + case(GM_NEW_ALARM_EVENT): + return("GM_NEW_ALARM_EVENT"); + case(GM_NEW_SENT_EVENT): + return("GM_NEW_SENT_EVENT"); + case(_GM_NEW_SLEEP_EVENT): + return("GM_NEW_SLEEP_EVENT"); + case(GM_NEW_RAW_RECV_EVENT): + return("GM_NEW_RAW_RECV_EVENT"); + case(GM_NEW_BAD_SEND_DETECTED_EVENT): + return("GM_NEW_BAD_SEND_DETECTED_EVENT"); + case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT): + return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT"); + case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT): + return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT"); + case(GM_NEW_BAD_RECV_TOKEN_EVENT): + return("GM_NEW_BAD_RECV_TOKEN_EVENT"); + case(GM_NEW_ALARM_VIOLATION_EVENT): + return("GM_NEW_ALARM_VIOLATION_EVENT"); + case(GM_NEW_RECV_EVENT): + return("GM_NEW_RECV_EVENT"); + case(GM_NEW_HIGH_RECV_EVENT): + return("GM_NEW_HIGH_RECV_EVENT"); + case(GM_NEW_PEER_RECV_EVENT): + return("GM_NEW_PEER_RECV_EVENT"); + case(GM_NEW_HIGH_PEER_RECV_EVENT): + return("GM_NEW_HIGH_PEER_RECV_EVENT"); + case(GM_NEW_FAST_RECV_EVENT): + return("GM_NEW_FAST_RECV_EVENT"); + case(GM_NEW_FAST_HIGH_RECV_EVENT): + return("GM_NEW_FAST_HIGH_RECV_EVENT"); + case(GM_NEW_FAST_PEER_RECV_EVENT): + return("GM_NEW_FAST_PEER_RECV_EVENT"); + case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT): + return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT"); + case(GM_NEW_REJECTED_SEND_EVENT): + return("GM_NEW_REJECTED_SEND_EVENT"); + case(GM_NEW_ORPHANED_SEND_EVENT): + return("GM_NEW_ORPHANED_SEND_EVENT"); + case(_GM_NEW_PUT_NOTIFICATION_EVENT): + return("_GM_NEW_PUT_NOTIFICATION_EVENT"); + case(GM_NEW_FREE_SEND_TOKEN_EVENT): + return("GM_NEW_FREE_SEND_TOKEN_EVENT"); + case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT): + return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT"); + case(GM_NEW_BAD_RESEND_DETECTED_EVENT): + return("GM_NEW_BAD_RESEND_DETECTED_EVENT"); + case(GM_NEW_DROPPED_SEND_EVENT): + return("GM_NEW_DROPPED_SEND_EVENT"); + case(GM_NEW_BAD_SEND_VMA_EVENT): + return("GM_NEW_BAD_SEND_VMA_EVENT"); + case(GM_NEW_BAD_RECV_VMA_EVENT): + return("GM_NEW_BAD_RECV_VMA_EVENT"); + case(_GM_NEW_FLUSHED_ALARM_EVENT): + return("GM_NEW_FLUSHED_ALARM_EVENT"); + case(GM_NEW_SENT_TOKENS_EVENT): + return("GM_NEW_SENT_TOKENS_EVENT"); + case(GM_NEW_IGNORE_RECV_EVENT): + return("GM_NEW_IGNORE_RECV_EVENT"); + case(GM_NEW_ETHERNET_RECV_EVENT): + return("GM_NEW_ETHERNET_RECV_EVENT"); + default: + snprintf(msg, 24, "Unknown Recv event [%d]", event); + return(msg); +#if 0 + case(/* _GM_PUT_NOTIFICATION_EVENT */ + case(/* GM_FREE_SEND_TOKEN_EVENT */ + case(/* GM_FREE_HIGH_SEND_TOKEN_EVENT */ +#endif + } +} + + +void +lgmnal_yield(int delay) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); +} + +int +lgmnal_is_small_message(lgmnal_data_t *nal_data, int niov, struct iovec *iov, int len) +{ + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_is_small_message len is [%d]\n", len)); + if (len < LGMNAL_SMALL_MSG_SIZE(nal_data)) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Yep, small message]\n")); + return(1); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No, not small message]\n")); + return(0); + } +} + +void * +lgmnal_hash_find(lgmnal_hash_t *hash, void *key) +{ + void *data = NULL; + int count = 0; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_find hash [%p] key [%p]\n", hash, key)); + + while (hash) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find Stepping [%d]\n", count++)); + if (hash->key == key) { + data = hash->data; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find hash got data[%p]\n", data)); + return(data); + } else + hash = hash->next; + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find data not found\n")); + return(NULL); +} + +/* + * TO DO hash. figure out why getting bad stuff from gm_hash and thne use it. + */ + +int +lgmnal_hash_add(lgmnal_hash_t **hash, void *key, void *data) +{ + +#ifdef LGMNAL_USE_GM_HASH + return(gm_hash_insert(*hash, (void*)key, (void*)data); +#else + lgmnal_hash_t *new = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_add hash [%p]\n", *hash)); + PORTAL_ALLOC(new, sizeof(lgmnal_hash_t)); + memset(new, 0, sizeof(lgmnal_hash_t)); + if (!new) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_hash_add :: can't get memory\n")); + return(-1); + } + new->data = data; + new->key = key; + new->next = *hash; + *hash = new; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_add hash head [%p]\n", *hash)); + return(0); +#endif +} + +void +lgmnal_hash_free(lgmnal_hash_t **hash) +{ + + lgmnal_hash_t *_hash = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_free hash [p%]\n", *hash)); + + while (*hash) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_free freeing hash [p%]\n", _hash)); + _hash = *hash; + *hash = _hash->next; + PORTAL_FREE(_hash, sizeof(lgmnal_hash_t)); + } + return; +} + + +EXPORT_SYMBOL(lgmnal_yield); +EXPORT_SYMBOL(lgmnal_print); +EXPORT_SYMBOL(lgmnal_alloc_srxd); +EXPORT_SYMBOL(lgmnal_get_srxd); +EXPORT_SYMBOL(lgmnal_return_srxd); +EXPORT_SYMBOL(lgmnal_free_srxd); +EXPORT_SYMBOL(lgmnal_alloc_stxd); +EXPORT_SYMBOL(lgmnal_get_stxd); +EXPORT_SYMBOL(lgmnal_return_stxd); +EXPORT_SYMBOL(lgmnal_free_stxd); +EXPORT_SYMBOL(lgmnal_rxbuffer_to_srxd); +EXPORT_SYMBOL(lgmnal_rxevent); +EXPORT_SYMBOL(lgmnal_gm_error); +EXPORT_SYMBOL(lgmnal_stop_rxthread); diff --git a/lustre/portals/knals/lgmnal/Makefile.am b/lustre/portals/knals/lgmnal/Makefile.am new file mode 100644 index 0000000..6794494 --- /dev/null +++ b/lustre/portals/knals/lgmnal/Makefile.am @@ -0,0 +1,13 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +include ../../Rules.linux + +MODULE = lgmnal +modulenet_DATA = lgmnal.o +EXTRA_PROGRAMS = lgmnal + +DEFS = +lgmnal_SOURCES = lgmnal.h lgmnal_api.c lgmnal_cb.c lgmnal_comm.c lgmnal_utils.c lgmnal_module.c diff --git a/lustre/portals/knals/lgmnal/Makefile.mk b/lustre/portals/knals/lgmnal/Makefile.mk new file mode 100644 index 0000000..c8ca67f --- /dev/null +++ b/lustre/portals/knals/lgmnal/Makefile.mk @@ -0,0 +1,10 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +include ../../Kernelenv + +obj-y += lgmnal.o +lgmnal-objs := lgmnal_api.o lgmnal_cb.o lgmnal_utils.o lgmnal_comm.o lgmnal_module.o + diff --git a/lustre/portals/knals/lgmnal/lgmnal.h b/lustre/portals/knals/lgmnal/lgmnal.h new file mode 100644 index 0000000..1147078 --- /dev/null +++ b/lustre/portals/knals/lgmnal/lgmnal.h @@ -0,0 +1,451 @@ +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + +/* + * Portals GM kernel NAL header file + * This file makes all declaration and prototypes + * for the API side and CB side of the NAL + */ +#ifndef __INCLUDE_LGMNAL_H__ +#define __INCLUDE_LGMNAL_H__ + +#include "linux/config.h" +#include "linux/module.h" +#include "linux/tty.h" +#include "linux/kernel.h" +#include "linux/mm.h" +#include "linux/string.h" +#include "linux/stat.h" +#include "linux/errno.h" +#include "linux/locks.h" +#include "linux/unistd.h" +#include "linux/init.h" +#include "linux/sem.h" +#include "linux/vmalloc.h" +#ifdef MODVERSIONS +#include +#endif + + +#include "portals/nal.h" +#include "portals/api.h" +#include "portals/errno.h" +#include "linux/kp30.h" +#include "portals/p30.h" + +#include "portals/lib-nal.h" +#include "portals/lib-p30.h" + +#define GM_STRONG_TYPES 1 +#include "gm.h" +#include "gm_internal.h" + + +/* + * Defines for the API NAL + */ + + + +/* + * Small message size is configurable + * insmod can set small_msg_size + * which is used to populate nal_data.small_msg_size + */ +#define LGMNAL_SMALL_MESSAGE 1078 +#define LGMNAL_LARGE_MESSAGE_INIT 1079 +#define LGMNAL_LARGE_MESSAGE_ACK 1080 +#define LGMNAL_LARGE_MESSAGE_FINI 1081 + +extern int lgmnal_small_msg_size; +#define LGMNAL_SMALL_MSG_SIZE(a) a->small_msg_size +#define LGMNAL_IS_SMALL_MESSAGE(n,a,b,c) lgmnal_is_small_message(n, a, b, c) +#define LGMNAL_MAGIC 0x1234abcd + +typedef struct _lgmnal_hash { + void *key; + void *data; + struct _lgmnal_hash *next; + } lgmnal_hash_t; + +/* + * Small Transmit Descriptor + * A structre to keep track of a small transmit operation + * This structure has a one-to-one relationship with a small + * transmit buffer (both create by lgmnal_stxd_alloc). + * stxd has pointer to txbuffer and the hash table in nal_data + * allows us to go the other way. + */ +typedef struct _lgmnal_stxd_t { + void *buffer; /* Address of small wired buffer this decriptor uses */ + int size; /* size (in bytes) of the tx buffer this descripto uses */ + gm_size_t gmsize; /* gmsize of the tx buffer this descripto uses */ + int type; /* large or small message */ + struct _lgmnal_data_t *nal_data; + lib_msg_t *cookie; /* the cookie the portals library gave us */ + int niov; + struct iovec iov[PTL_MD_MAX_IOV]; + struct _lgmnal_stxd_t *next; +} lgmnal_stxd_t; + +/* + * as for lgmnal_stxd_t + */ +typedef struct _lgmnal_srxd_t { + void *buffer; + int size; + gm_size_t gmsize; + int type; + struct _lgmnal_srxd_t *next; +} lgmnal_srxd_t; + +/* + * Header which lmgnal puts at the start of each message + */ +typedef struct _lgmnal_msghdr { + int magic; + int type; + unsigned int sender_node_id; + lgmnal_stxd_t *stxd; + } lgmnal_msghdr_t; +#define LGMNAL_MSGHDR_SIZE sizeof(lgmnal_msghdr_t) + +/* + * There's one of these for each interface that is initialised + * There's a maximum of LGMNAL_NUM_IF lgmnal_data_t + */ + +typedef struct _lgmnal_data_t { + int refcnt; +#ifdef LGMNAL_API_LOCK_SPIN + spinlock_t api_lock; /* lock provided for api->lock function */ +#else + struct semaphore api_lock; +#endif + spinlock_t cb_lock; /* lock provided for cb_cli function */ + char _cb_file[128]; + char _cb_function[128]; + int _cb_line; + spinlock_t stxd_lock; /* lock to add or remove stxd to/from free list */ + struct semaphore stxd_token; /* Don't try to access the list until get a token */ + lgmnal_stxd_t *stxd; /* list of free stxd's */ +#ifdef LGMNAL_USE_GM_HASH + struct gm_hash *stxd_hash; /* hash to translate txbuffer to stxd. Created in stxd_alloc */ +#else + lgmnal_hash_t *stxd_hash; /* hash to translate txbuffer to stxd. Created in stxd_alloc */ +#endif + spinlock_t srxd_lock; + struct semaphore srxd_token; + lgmnal_srxd_t *srxd; +#ifdef LGMNAL_USE_GM_HASH + struct gm_hash *srxd_hash; +#else + lgmnal_hash_t *srxd_hash; +#endif + nal_t *nal; /* our API NAL */ + nal_cb_t *nal_cb; /* our CB nal */ + struct gm_port *gm_port; /* the gm port structure we open in lgmnal_init */ + unsigned int gm_local_nid; /* our gm local node id */ + unsigned int gm_global_nid; /* our gm global node id */ + spinlock_t gm_lock; /* GM is not threadsage */ + long rxthread_pid; /* thread id of our receiver thread */ + int rxthread_flag; /* stop the thread flag */ + gm_alarm_t rxthread_alarm; /* used to wake sleeping rx thread */ + int small_msg_size; + int small_msg_gmsize; + char _file[128]; + char _function[128]; + int _line; +} lgmnal_data_t; + +/* + * For nal_data->rxthread_flag + */ +#define LGMNAL_THREAD_START 444 +#define LGMNAL_THREAD_STARTED 333 +#define LGMNAL_THREAD_CONTINUE 777 +#define LGMNAL_THREAD_STOP 666 +#define LGMNAL_THREAD_STOPPED 555 + +#define LGMNAL_NUM_IF 1 + +#if 0 +/* + * A global structre to maintain 1 nal_data structure for each + * myrinet card that the user initialises (only tested for 1) + * To add or remove any nal_data structures from the ifs arrary the + * init_lock must be acquired. This is the only time this lock is acquired + */ +typedef struct _lgmnal_global_t { + int debug_level; + struct semaphore init_lock; + lgmnal_data_t *ifs[LGMNAL_NUM_IF]; +} lgmnal_global_t; + +extern lgmnal_data_t global_nal_data; +#define LGMNAL_DEBUG_LEVEL lgmnal_global.debug_level +#else +extern lgmnal_data_t *global_nal_data; +extern int lgmnal_debug_level; +#define LGMNAL_DEBUG_LEVEL lgmnal_debug_level +#endif + +/* + * The gm_port to use for lgmnal + */ +#define LGMNAL_GM_PORT 4 + +/* + * for ioctl get pid + */ +#define LGMNAL_IOC_GET_GNID 1 + +/* + * LGMNAL_DEBUG_LEVEL set by module load 0= level) lgmnal_print args +#else +#define LGMNAL_PRINT(level, args) +#endif + +#define LGMNAL_DEBUG_ERR 1 /* only report errors */ +#define LGMNAL_DEBUG_TRACE 2 /* on entering function */ +#define LGMNAL_DEBUG_V 3 /* debug */ +#define LGMNAL_DEBUG_VV 4 /* more debug */ + +/* + * Return codes + */ +#define LGMNAL_STATUS_OK 0 +#define LGMNAL_STATUS_FAIL 1 +#define LGMNAL_STATUS_NOMEM 2 + + +/* + * FUNCTION PROTOTYPES + */ + +/* + * Locking macros + */ + +/* + * To access the global structure + * to add or remove interface (lgmnal_init) or shutdown only + */ +#define LGMNAL_GLOBAL_LOCK_INIT sema_init(&(lgmnal_global.init_lock), 1) +#define LGMNAL_GLOBAL_LOCK do { \ + LGMNAL_PRINT(1, ("Acquiring global mutex\n")); \ + down(&(lgmnal_global.init_lock)); \ + LGMNAL_PRINT(1, ("Got global lock\n")); \ + } while (0) +#define LGMNAL_GLOBAL_UNLOCK do { \ + LGMNAL_PRINT(1, ("Releasing global mutex\n")); \ + up(&(lgmnal_global.init_lock)); \ + LGMNAL_PRINT(1, ("Release global mutex\n")); \ + } while (0) + +/* + * For the API lock function + */ +#ifdef LGMNAL_API_LOCK_SPIN +#define LGMNAL_API_LOCK_INIT(a) spin_lock_init(&a->api_lock) +#define LGMNAL_API_LOCK(a) spin_lock(&a->api_lock) +#define LGMNAL_API_UNLOCK(a) spin_unlock(&a->api_lock) +#else +#define LGMNAL_API_LOCK_INIT(a) sema_init(&a->api_lock, 1) +#define LGMNAL_API_LOCK(a) down(&a->api_lock) +#define LGMNAL_API_UNLOCK(a) up(&a->api_lock) +#endif + +/* + * For the Small tx and rx descriptor lists + */ +#define LGMNAL_TXD_LOCK_INIT(a) spin_lock_init(&a->stxd_lock); +#define LGMNAL_TXD_LOCK(a) spin_lock(&a->stxd_lock); +#define LGMNAL_TXD_UNLOCK(a) spin_unlock(&a->stxd_lock); +#define LGMNAL_TXD_TOKEN_INIT(a, n) sema_init(&a->stxd_token, n); +#define LGMNAL_TXD_GETTOKEN(a) down(&a->stxd_token); +#define LGMNAL_TXD_TRYGETTOKEN(a) down_trylock(&a->stxd_token) +#define LGMNAL_TXD_RETURNTOKEN(a) up(&a->stxd_token); + + +#define LGMNAL_RXD_LOCK_INIT(a) spin_lock_init(&a->srxd_lock); +#define LGMNAL_RXD_LOCK(a) spin_lock(&a->srxd_lock); +#define LGMNAL_RXD_UNLOCK(a) spin_unlock(&a->srxd_lock); +#define LGMNAL_RXD_TOKEN_INIT(a, n) sema_init(&a->srxd_token, n); +#define LGMNAL_RXD_GETTOKEN(a) down(&a->srxd_token); +#define LGMNAL_RXD_TRYGETTOKEN(a) down_trylock(&a->srxd_token) +#define LGMNAL_RXD_RETURNTOKEN(a) up(&a->srxd_token); + +#define LGMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock); +#define LGMNAL_GM_LOCK(a) do { \ + while (!spin_trylock(&a->gm_lock)) { \ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("waiting %s:%s:%d holder %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__, nal_data->_function, nal_data->_file, nal_data->_line)); \ + lgmnal_yield(128); \ + } \ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("GM Locked %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__)); \ + sprintf(nal_data->_function, "%s", __FUNCTION__); \ + sprintf(nal_data->_file, "%s", __FILE__); \ + nal_data->_line = __LINE__; \ + } while (0) +#define LGMNAL_GM_UNLOCK(a) do { \ + spin_unlock(&a->gm_lock); \ + memset(nal_data->_function, 0, 128); \ + memset(nal_data->_file, 0, 128); \ + nal_data->_line = 0; \ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("GM Unlocked %s:%s:%d\n", __FUNCTION__, __FILE__, __LINE__)); \ + } while(0); + +#define LGMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock); + + +/* + * API NAL + */ +int lgmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t); + +int lgmnal_api_shutdown(nal_t *, int); + +int lgmnal_api_validate(nal_t *, void *, size_t); + +void lgmnal_api_yield(nal_t *); + +void lgmnal_api_lock(nal_t *, unsigned long *); + +void lgmnal_api_unlock(nal_t *, unsigned long *); + + +#define LGMNAL_INIT_NAL(a) do { \ + a->forward = lgmnal_api_forward; \ + a->shutdown = lgmnal_api_shutdown; \ + a->validate = NULL; \ + a->yield = lgmnal_api_yield; \ + a->lock = lgmnal_api_lock; \ + a->unlock = lgmnal_api_unlock; \ + a->timeout = NULL; \ + a->refct = 1; \ + a->nal_data = NULL; \ + } while (0) + + +/* + * CB NAL + */ + +int lgmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t); + +int lgmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, + int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t); + +int lgmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *, + unsigned int, struct iovec *, size_t, size_t); + +int lgmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *, + unsigned int, ptl_kiov_t *, size_t, size_t); + +int lgmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t); + +int lgmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t); + +int lgmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *); + +void *lgmnal_cb_malloc(nal_cb_t *, size_t); + +void lgmnal_cb_free(nal_cb_t *, void *, size_t); + +void lgmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **); + +int lgmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **); + +void lgmnal_cb_printf(nal_cb_t *, const char *fmt, ...); + +void lgmnal_cb_cli(nal_cb_t *, unsigned long *); + +void lgmnal_cb_sti(nal_cb_t *, unsigned long *); + +int lgmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *); + +nal_t *lgmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid); + +void lgmnal_fini(void); + + + +#define LGMNAL_INIT_NAL_CB(a) do { \ + a->cb_send = lgmnal_cb_send; \ + a->cb_send_pages = lgmnal_cb_send_pages; \ + a->cb_recv = lgmnal_cb_recv; \ + a->cb_recv_pages = lgmnal_cb_recv_pages; \ + a->cb_read = lgmnal_cb_read; \ + a->cb_write = lgmnal_cb_write; \ + a->cb_callback = lgmnal_cb_callback; \ + a->cb_malloc = lgmnal_cb_malloc; \ + a->cb_free = lgmnal_cb_free; \ + a->cb_map = NULL; \ + a->cb_unmap = NULL; \ + a->cb_printf = lgmnal_cb_printf; \ + a->cb_cli = lgmnal_cb_cli; \ + a->cb_sti = lgmnal_cb_sti; \ + a->cb_dist = lgmnal_cb_dist; \ + a->nal_data = NULL; \ + } while (0) + +/* + * lgmnal utilities + */ + +void lgmnal_print(const char *, ...); + +/* + * Small Transmit and Receive Descriptor Functions + */ +int lgmnal_alloc_stxd(lgmnal_data_t *); +void lgmnal_free_stxd(lgmnal_data_t *); +lgmnal_stxd_t* lgmnal_get_stxd(lgmnal_data_t *, int); +void lgmnal_return_stxd(lgmnal_data_t *, lgmnal_stxd_t *); + +int lgmnal_alloc_srxd(lgmnal_data_t *); +void lgmnal_free_srxd(lgmnal_data_t *); +lgmnal_srxd_t* lgmnal_get_srxd(lgmnal_data_t *, int); +void lgmnal_return_srxd(lgmnal_data_t *, lgmnal_srxd_t *); + +/* + * general utility functions + */ +lgmnal_srxd_t *lgmnal_rxbuffer_to_srxd(lgmnal_data_t *, void*); +lgmnal_stxd_t *lgmnal_txbuffer_to_stxd(lgmnal_data_t *, void*); +void lgmnal_stop_rxthread(lgmnal_data_t *); +void lgmnal_small_tx_done(gm_port_t *, void *, gm_status_t); +char *lgmnal_gm_error(gm_status_t); +char *lgmnal_rxevent(gm_recv_event_t*); +int lgmnal_is_small_message(lgmnal_data_t*, int, struct iovec*, int); + +void *lgmnal_hash_find(lgmnal_hash_t *, void*); +int lgmnal_hash_add(lgmnal_hash_t**, void*, void*); +void lgmnal_hash_free(lgmnal_hash_t**); + +/* + * Communication functions + */ +int lgmnal_receive_thread(void *); +int +lgmnal_small_transmit(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *, int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec*, int); + +int +lgmnal_small_receive2(nal_cb_t *, void *, lib_msg_t *, unsigned int, struct iovec *, size_t, size_t); + +void lgmnal_yield(int); + +#endif /*__INCLUDE_LGMNAL_H__*/ diff --git a/lustre/portals/knals/lgmnal/lgmnal_api.c b/lustre/portals/knals/lgmnal/lgmnal_api.c new file mode 100644 index 0000000..8e774bf --- /dev/null +++ b/lustre/portals/knals/lgmnal/lgmnal_api.c @@ -0,0 +1,518 @@ + +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + + + +/* + * Implements the API NAL functions + */ + +#include "lgmnal.h" + +lgmnal_data_t *global_nal_data = NULL; +/* + * lgmnal_api_forward + * This function takes a pack block of arguments from the NAL API + * module and passes them to the NAL CB module. The CB module unpacks + * the args and calls the appropriate function indicated by index. + * Typically this function is used to pass args between kernel and use + * space. + * As lgmanl exists entirely in kernel, just pass the arg block directly to + * the NAL CB, buy passing the args to lib_dispatch + * Arguments are + * nal_t nal Our nal + * int index the api function that initiated this call + * void *args packed block of function args + * size_t arg_len length of args block + * void *ret A return value for the API NAL + * size_t ret_len Size of the return value + * + */ + +int +lgmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len, + void *ret, size_t ret_len) +{ + + nal_cb_t *nal_cb = NULL; + lgmnal_data_t *nal_data = NULL; + + + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_forward: nal [%p], index [%d], args [%p], arglen [%d], ret [%p], retlen [%d]\n", nal, index, args, arg_len, ret, ret_len)); + + if (!nal || !args || (index < 0) || (arg_len < 0)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Bad args to lgmnal_api_forward\n")); +#ifdef LGMNAL_DEBUG + if (!nal) + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No nal specified\n")); + if (!args) + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No args specified\n")); + if (index < 0) + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Index is negative[%d]\n", index)); + if (arg_len < 0) + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("arg_len is negative [%d]\n", arg_len)); +#endif + return (PTL_FAIL); + } + + if (ret && (ret_len <= 0)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Bad args to lgmnal_api_forward\n")); +#ifdef LGMNAL_DEBUG + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("ret_len is [%d]\n", ret_len)); +#endif + return (PTL_FAIL); + } + + + if (!nal->nal_data) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad nal, no nal data\n")); + return (PTL_FAIL); + } + + nal_data = nal->nal_data; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("nal_data is [%p]\n", nal_data)); + + if (!nal_data->nal_cb) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad nal_data, no nal_cb\n")); + return (PTL_FAIL); + } + + nal_cb = nal_data->nal_cb; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("nal_cb is [%p]\n", nal_cb)); + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_api_forward calling lib_dispatch\n")); + lib_dispatch(nal_cb, NULL, index, args, ret); + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_api_forward returns from lib_dispatch\n")); + + return(PTL_OK); +} + + +/* + * lgmnal_api_shutdown + * Close down this interface and free any resources associated with it + * nal_t nal our nal to shutdown + */ +int +lgmnal_api_shutdown(nal_t *nal, int interface) +{ + + lgmnal_data_t *nal_data = nal->nal_data; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_shutdown: nal_data [%p]\n", nal_data)); + + /* + * TO DO lgmnal_api_shutdown what is to be done? + */ + + return(PTL_OK); +} + + +/* + * lgmnal_api_validate + * validate a user address for use in communications + * There's nothing to be done here + */ +int +lgmnal_api_validate(nal_t *nal, void *base, size_t extent) +{ + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_validate : nal [%p], base [%p], extent [%d]\n", nal, base, extent)); + + return(PTL_OK); +} + + + +/* + * lgmnal_api_yield + * Give up the processor + */ +void +lgmnal_api_yield(nal_t *nal) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_yield : nal [%p]\n", nal)); + + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + + return; +} + + + +/* + * lgmnal_api_lock + * Take a threadsafe lock + */ +void +lgmnal_api_lock(nal_t *nal, unsigned long *flags) +{ + + lgmnal_data_t *nal_data; + nal_cb_t *nal_cb; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_lock : nal [%p], flagsa [%p] flags[%ul]\n", nal, flags, *flags)); + + nal_data = nal->nal_data; + nal_cb = nal_data->nal_cb; + + nal_cb->cb_cli(nal_cb, flags); +/* + LGMNAL_API_LOCK(nal_data); +*/ + + return; +} + +/* + * lgmnal_api_unlock + * Release a threadsafe lock + */ +void +lgmnal_api_unlock(nal_t *nal, unsigned long *flags) +{ + lgmnal_data_t *nal_data; + nal_cb_t *nal_cb; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_api_lock : nal [%p], flags [%p]\n", nal, flags)); + + nal_data = nal->nal_data; + if (!nal_data) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_api_unlock bad nal, no nal_data\n")); + } + nal_cb = nal_data->nal_cb; + if (!nal_cb) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_api_unlock bad nal_data, no nal_cb\n")); + } + + nal_cb->cb_sti(nal_cb, flags); +/* + LGMNAL_API_UNLOCK(nal_data); +*/ + + return; +} + + +nal_t * +lgmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, ptl_pid_t rpid) +{ + + nal_t *nal = NULL; + nal_cb_t *nal_cb = NULL; + lgmnal_data_t *nal_data = NULL; + lgmnal_srxd_t *srxd = NULL; + gm_status_t gm_status; + unsigned int local_nid = 0, global_nid = 0; + ptl_nid_t portals_nid; + ptl_pid_t portals_pid = 0; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_init : interface [%d], ptl_size [%d], ac_size[%d]\n", + interface, ptl_size, ac_size)); + + if ((interface < 0) || (interface > LGMNAL_NUM_IF) || (ptl_size <= 0) || (ac_size <= 0) ) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("bad args\n")); + return(NULL); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("parameters check out ok\n")); + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Acquired global lock\n")); + + + PORTAL_ALLOC(nal_data, sizeof(lgmnal_data_t)); + if (!nal_data) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("can't get memory\n")); + return(NULL); + } + memset(nal_data, 0, sizeof(lgmnal_data_t)); + /* + * set the small message buffer size + */ + nal_data->refcnt = 1; + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal_data[%p]\n", nal_data)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("small_msg_size is [%d]\n", nal_data->small_msg_size)); + + PORTAL_ALLOC(nal, sizeof(nal_t)); + if (!nal) { + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + return(NULL); + } + memset(nal, 0, sizeof(nal_t)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal[%p]\n", nal)); + + PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t)); + if (!nal_cb) { + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + return(NULL); + } + memset(nal_cb, 0, sizeof(nal_cb_t)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocd and reset nal_cb[%p]\n", nal_cb)); + + LGMNAL_INIT_NAL(nal); + LGMNAL_INIT_NAL_CB(nal_cb); + /* + * String them all together + */ + nal->nal_data = (void*)nal_data; + nal_cb->nal_data = (void*)nal_data; + nal_data->nal = nal; + nal_data->nal_cb = nal_cb; + + LGMNAL_API_LOCK_INIT(nal_data); + LGMNAL_CB_LOCK_INIT(nal_data); + LGMNAL_GM_LOCK_INIT(nal_data); + + + /* + * initialise the interface, + */ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling gm_init\n")); + if (gm_init() != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("call to gm_init failed\n")); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_open with interface [%d], port [%d], name [%s], version [%d]\n", interface, LGMNAL_GM_PORT, "lgmnal", GM_API_VERSION)); + + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_open(&nal_data->gm_port, 0, LGMNAL_GM_PORT, "lgmnal", GM_API_VERSION); + LGMNAL_GM_UNLOCK(nal_data); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_open returned [%d]\n", gm_status)); + if (gm_status == GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_open succeeded port[%p]\n", nal_data->gm_port)); + } else { + switch(gm_status) { + case(GM_INVALID_PARAMETER): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Invalid Parameter\n")); + break; + case(GM_BUSY): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. GM Busy\n")); + break; + case(GM_NO_SUCH_DEVICE): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. No such device\n")); + break; + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Incompatile lib and driver\n")); + break; + case(GM_OUT_OF_MEMORY): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Out of Memory\n")); + break; + default: + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_open Failure. Unknow error code [%d]\n", gm_status)); + break; + } + LGMNAL_GM_LOCK(nal_data); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + nal_data->small_msg_size = lgmnal_small_msg_size; + nal_data->small_msg_gmsize = gm_min_size_for_length(lgmnal_small_msg_size); + + if (lgmnal_alloc_srxd(nal_data) != LGMNAL_STATUS_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to allocate small rx descriptors\n")); + lgmnal_free_stxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + + /* + * Hang out a bunch of small receive buffers + * In fact hang them all out + */ + while((srxd = lgmnal_get_srxd(nal_data, 0))) { + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("giving [%p] to gm_provide_recvive_buffer\n", srxd->buffer)); + LGMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0); + LGMNAL_GM_UNLOCK(nal_data); + } + + /* + * Allocate pools of small tx buffers and descriptors + */ + if (lgmnal_alloc_stxd(nal_data) != LGMNAL_STATUS_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to allocate small tx descriptors\n")); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + + /* + * Start the recieve thread + * Initialise the gm_alarm we will use to wake the thread is + * it needs to be stopped + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Initializing receive thread alarm and flag\n")); + gm_initialize_alarm(&nal_data->rxthread_alarm); + nal_data->rxthread_flag = LGMNAL_THREAD_START; + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Starting receive thread\n")); + nal_data->rxthread_pid = kernel_thread(lgmnal_receive_thread, (void*)nal_data, 0); + if (nal_data->rxthread_pid <= 0) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Receive thread failed to start\n")); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + while (nal_data->rxthread_flag != LGMNAL_THREAD_STARTED) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1024); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Waiting for receive thread signs of life\n")); + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("receive thread seems to have started\n")); + nal_data->rxthread_flag = LGMNAL_THREAD_CONTINUE; + + + + /* + * Initialise the portals library + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Getting node id\n")); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_get_node_id(nal_data->gm_port, &local_nid); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + lgmnal_stop_rxthread(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("can't determine node id\n")); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + nal_data->gm_local_nid = local_nid; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local node id is [%u]\n", local_nid)); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid, &global_nid); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to obtain global id\n")); + lgmnal_stop_rxthread(nal_data); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Global node id is [%u][%x]\n", global_nid)); + nal_data->gm_global_nid = global_nid; + +/* + pid = gm_getpid(); +*/ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_pid is [%u]\n", portals_pid)); + portals_nid = (unsigned long)global_nid; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_nid is [%lu]\n", portals_nid)); + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling lib_init\n")); + if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size, ac_size) != PTL_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lib_init failed\n")); + lgmnal_stop_rxthread(nal_data); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); + return(NULL); + + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_init finished\n")); + global_nal_data = nal->nal_data; + return(nal); +} + + + +/* + * Called when module removed + */ +void lgmnal_fini() +{ + lgmnal_data_t *nal_data = global_nal_data; + nal_t *nal = nal_data->nal; + nal_cb_t *nal_cb = nal_data->nal_cb; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_fini\n")); + + PtlNIFini(lgmnal_ni); + lib_fini(nal_cb); + + lgmnal_stop_rxthread(nal_data); + lgmnal_free_stxd(nal_data); + lgmnal_free_srxd(nal_data); + LGMNAL_GM_LOCK(nal_data); + gm_close(nal_data->gm_port); + gm_finalize(); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(nal, sizeof(nal_t)); + PORTAL_FREE(nal_data, sizeof(lgmnal_data_t)); + PORTAL_FREE(nal_cb, sizeof(nal_cb_t)); +} + +EXPORT_SYMBOL(lgmnal_init); +EXPORT_SYMBOL(lgmnal_fini); +EXPORT_SYMBOL(lgmnal_api_forward); +EXPORT_SYMBOL(lgmnal_api_validate); +EXPORT_SYMBOL(lgmnal_api_yield); +EXPORT_SYMBOL(lgmnal_api_lock); +EXPORT_SYMBOL(lgmnal_api_unlock); +EXPORT_SYMBOL(lgmnal_api_shutdown); diff --git a/lustre/portals/knals/lgmnal/lgmnal_cb.c b/lustre/portals/knals/lgmnal/lgmnal_cb.c new file mode 100644 index 0000000..bb231af --- /dev/null +++ b/lustre/portals/knals/lgmnal/lgmnal_cb.c @@ -0,0 +1,248 @@ +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + + +/* + * This file implements the nal cb functions + */ + + +#include "lgmnal.h" + +int lgmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) +{ + lgmnal_srxd_t *srxd = (lgmnal_srxd_t*)private; + int status = PTL_OK; + lgmnal_data_t *nal_data = nal_cb->nal_data; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_recv nal_cb [%p],private[%p], cookie[%p], niov[%d], iov [%p], mlen[%d], rlen[%d]\n", nal_cb, private, cookie, niov, iov, mlen, rlen)); + + if (srxd->type == LGMNAL_SMALL_MESSAGE) { + if (!LGMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, mlen)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_cb_recv. This is not a small message\n")); + } + status = lgmnal_small_receive2(nal_cb, private, cookie, niov, iov, mlen, rlen); + } + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_return status [%d]\n", status)); + return(status); +} + +int lgmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int kniov, ptl_kiov_t *kiov, size_t mlen, size_t rlen) +{ + lgmnal_srxd_t *srxd = (lgmnal_srxd_t*)private; + int status = PTL_OK; + struct iovec *iovec = NULL; + int i = 0; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_recv_pages nal_cb [%p],private[%p], cookie[%p], kniov[%d], kiov [%p], mlen[%d], rlen[%d]\n", nal_cb, private, cookie, kniov, kiov, mlen, rlen)); + + if (srxd->type == LGMNAL_SMALL_MESSAGE) { + PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov); + if (!iovec) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Can't malloc\n")); + return(LGMNAL_STATUS_FAIL); + } + + /* + * map each page and create an iovec for it + */ + for (i=0; ikiov_page, kiov->kiov_len, kiov->kiov_offset)); + iovec->iov_len = kiov->kiov_len; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling kmap", kiov->kiov_page)); + iovec->iov_base = kmap(kiov->kiov_page) + kiov->kiov_offset; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling iov_base is [%p]", iovec->iov_base)); + iovec->iov_len = kiov->kiov_len; + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("calling lgmnal_small_receive2\n")); + status = lgmnal_small_receive2(nal_cb, private, cookie, kniov, iovec, mlen, rlen); + PORTAL_FREE(iovec, sizeof(struct iovec)*kniov); + } + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_return status [%d]\n", status)); + return(status); +} + + +int lgmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, size_t len) +{ + + lgmnal_data_t *nal_data; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_sendnid [%lu] niov[%d] len[%d]\n", nid, niov, len)); + nal_data = nal_cb->nal_data; + + if (LGMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is a small message send\n")); + lgmnal_small_transmit(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("This is a large message send it is not supported yet\n")); +/* + lgmnal_large_transmit1(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); +*/ + return(LGMNAL_STATUS_FAIL); + } + return(PTL_OK); +} + +int lgmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, + int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len) +{ + + int i = 0; + lgmnal_data_t *nal_data; + struct iovec *iovec; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_send_pages nid [%lu] niov[%d] len[%d]\n", nid, kniov, len)); + nal_data = nal_cb->nal_data; + if (LGMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) { + /* TO DO fix small message for send pages */ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is a small message send\n")); + PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec)); + + for (i=0; ikiov_page, kiov->kiov_len, kiov->kiov_offset)); + iovec->iov_len = kiov->kiov_len; + iovec->iov_base = kmap(kiov->kiov_page) + kiov->kiov_offset; + iovec->iov_len = kiov->kiov_len; + } + lgmnal_small_transmit(nal_cb, private, cookie, hdr, type, nid, pid, kniov, iovec, len); + PORTAL_FREE(iovec, kniov*sizeof(struct iovec)); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("This is a large message send it is not supported yet\n")); +/* + lgmnal_large_transmit1(nal_cb, private, cookie, hdr, type, nid, pid, niov, iov, len); +*/ + return(LGMNAL_STATUS_FAIL); + } + return(PTL_OK); +} + +int lgmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst, user_ptr src, size_t len) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_read dst [%p] src [%p] len[%d]\n", dst, src, len)); + gm_bcopy(src, dst, len); + return(PTL_OK); +} + +int lgmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst, void *src, size_t len) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_write :: dst [%p] src [%p] len[%d]\n", dst, src, len)); + gm_bcopy(src, dst, len); + return(PTL_OK); +} + +int lgmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq, ptl_event_t *ev) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_callback nal_cb[%p], private[%p], eq[%p], ev[%p]\n", nal_cb, private, eq, ev)); + + if (eq->event_callback != NULL) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("found callback\n")); + eq->event_callback(ev); + } + + return(PTL_OK); +} + +void *lgmnal_cb_malloc(nal_cb_t *nal_cb, size_t len) +{ + void *ptr = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_malloc len[%d]\n", len)); + PORTAL_ALLOC(ptr, len); + return(ptr); +} + +void lgmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_free :: buf[%p] len[%d]\n", buf, len)); + PORTAL_FREE(buf, len); + return; +} + +void lgmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, void **addrkey) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_unmap niov[%d] iov[%], addrkey[%p]\n", niov, iov, addrkey)); + return; +} + +int lgmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov, void**addrkey) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_map niov[%d], iov[%p], addrkey[%p], niov, iov, addrkey\n")); + return(PTL_OK); +} + +void lgmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_printf\n")); + lgmnal_print(fmt); + return; +} + +void lgmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags) +{ + lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; + spinlock_t cb_lock = nal_data->cb_lock; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_cli\n")); +/* + local_irq_save(*flags); + spin_lock_irqsave(&cb_lock, *flags); +*/ + spin_lock(&cb_lock); + return; +} + +void lgmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags) +{ + lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; + spinlock_t cb_lock = nal_data->cb_lock; + +/* + local_irq_restore(*flags); + spin_unlock_irqrestore(&cb_lock, *flags); +*/ + spin_unlock(&cb_lock); + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_sti\n")); + return; +} + +int lgmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cb_dist\n")); + if (dist) + *dist = 27; + return(PTL_OK); +} + + + + +EXPORT_SYMBOL(lgmnal_cb_send); +EXPORT_SYMBOL(lgmnal_cb_send_pages); +EXPORT_SYMBOL(lgmnal_cb_recv); +EXPORT_SYMBOL(lgmnal_cb_recv_pages); +EXPORT_SYMBOL(lgmnal_cb_read); +EXPORT_SYMBOL(lgmnal_cb_write); +EXPORT_SYMBOL(lgmnal_cb_cli); +EXPORT_SYMBOL(lgmnal_cb_sti); +EXPORT_SYMBOL(lgmnal_cb_dist); +EXPORT_SYMBOL(lgmnal_cb_printf); +EXPORT_SYMBOL(lgmnal_cb_map); +EXPORT_SYMBOL(lgmnal_cb_unmap); +EXPORT_SYMBOL(lgmnal_cb_callback); +EXPORT_SYMBOL(lgmnal_cb_free); +EXPORT_SYMBOL(lgmnal_cb_malloc); diff --git a/lustre/portals/knals/lgmnal/lgmnal_comm.c b/lustre/portals/knals/lgmnal/lgmnal_comm.c new file mode 100644 index 0000000..091f665 --- /dev/null +++ b/lustre/portals/knals/lgmnal/lgmnal_comm.c @@ -0,0 +1,464 @@ +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + +/* + * This file contains all lgmnal send and receive functions + */ + +#include "lgmnal.h" + +int +lgmnal_requeue_rxbuffer(lgmnal_data_t *nal_data, lgmnal_srxd_t *srxd) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_requeue_rxbuffer\n")); + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("requeueing srxd[%p] nal_data[%p]\n", srxd, nal_data)); + + LGMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, + srxd->gmsize, GM_LOW_PRIORITY, 0 ); + LGMNAL_GM_UNLOCK(nal_data); + + return(LGMNAL_STATUS_OK); +} + + +/* + * Handle a bad message + * A bad message is one we don't expect or can't interpret + */ +int +lgmnal_badrx_message(lgmnal_data_t *nal_data, gm_recv_t *recv, lgmnal_srxd_t *srxd) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Can't handle message\n")); + + if (!srxd) + srxd = lgmnal_rxbuffer_to_srxd(nal_data, gm_ntohp(recv->buffer)); + if (srxd) { + lgmnal_requeue_rxbuffer(nal_data, srxd); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Can't find a descriptor for this buffer\n")); + /* + * get rid of it ? + */ + return(LGMNAL_STATUS_FAIL); + } + + return(LGMNAL_STATUS_OK); +} + + +/* + * Start processing a small message receive + * Get here from lgmnal_receive_thread + * Hand off to lib_parse, which calls cb_recv + * which hands back to lgmnal_small_receive2 + * Deal with all endian stuff here (if we can!) + */ +int +lgmnal_small_receive1(lgmnal_data_t *nal_data, gm_recv_t *recv) +{ + lgmnal_srxd_t *srxd = NULL; + void *buffer = NULL; + unsigned int snode, sport, type, length; + lgmnal_msghdr_t *lgmnal_msghdr; + ptl_hdr_t *portals_hdr; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_receive1 nal_data [%p], recv [%p]\n", nal_data, recv)); + + buffer = gm_ntohp(recv->buffer);; + snode = (int)gm_ntoh_u16(recv->sender_node_id); + sport = (int)gm_ntoh_u8(recv->sender_port_id); + type = (int)gm_ntoh_u8(recv->type); + buffer = gm_ntohp(recv->buffer); + length = (int) gm_ntohl(recv->length); + + lgmnal_msghdr = (lgmnal_msghdr_t*)buffer; + portals_hdr = (ptl_hdr_t*)(buffer+LGMNAL_MSGHDR_SIZE); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("rx_event:: Sender node [%d], Sender Port [%d], type [%d], length [%d], buffer [%p]\n", + snode, sport, type, length, buffer)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_msghdr:: Sender node [%u], magic [%lx], type [%d]\n", + lgmnal_msghdr->sender_node_id, lgmnal_msghdr->magic, lgmnal_msghdr->type)); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_hdr:: Sender node [%ul], dest_node [%ul]\n", + portals_hdr->src_nid, portals_hdr->dest_nid)); + + + /* + * Get a transmit descriptor for this message + */ + srxd = lgmnal_rxbuffer_to_srxd(nal_data, buffer); + LGMNAL_PRINT(LGMNAL_DEBUG, ("Back from lgmnal_rxbuffer_to_srxd\n")); + if (!srxd) { + LGMNAL_PRINT(LGMNAL_DEBUG, ("Failed to get receive descriptor for this buffer\n")); + lib_parse(nal_data->nal_cb, portals_hdr, srxd); + return(LGMNAL_STATUS_FAIL); + } + srxd->type = LGMNAL_SMALL_MESSAGE; + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling lib_parse buffer is [%p]\n", buffer+LGMNAL_MSGHDR_SIZE)); + /* + * control passes to lib, which calls cb_recv + * cb_recv is responsible for returning the buffer + * for future receive + */ + lib_parse(nal_data->nal_cb, portals_hdr, srxd); + + return(LGMNAL_STATUS_OK); +} + +/* + * Get here from lgmnal_receive_thread, lgmnal_small_receive1 + * lib_parse, cb_recv + * Put data from prewired receive buffer into users buffer(s) + * Hang out the receive buffer again for another receive + * Call lib_finalize + */ +int +lgmnal_small_receive2(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, unsigned int niov, + struct iovec *iov, size_t mlen, size_t rlen) +{ + lgmnal_srxd_t *srxd = NULL; + void *buffer = NULL; + lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_receive2 niov [%d] mlen[%d]\n", niov, mlen)); + + if (!private) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_small_receive2 no context\n")); + lib_finalize(nal_cb, private, cookie); + return(PTL_FAIL); + } + + srxd = (lgmnal_srxd_t*)private; + buffer = srxd->buffer; + buffer += sizeof(lgmnal_msghdr_t); + buffer += sizeof(ptl_hdr_t); + + while(niov--) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing [%p] len [%d]\n", iov, iov->iov_len)); + gm_bcopy(buffer, iov->iov_base, iov->iov_len); + buffer += iov->iov_len; + iov++; + } + + + /* + * let portals library know receive is complete + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling lib_finalize\n")); + if (lib_finalize(nal_cb, private, cookie) != PTL_OK) { + /* TO DO what to do with failed lib_finalise? */ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lib_finalize failed\n")); + } + /* + * return buffer so it can be used again + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("calling gm_provide_receive_buffer\n")); + LGMNAL_GM_LOCK(nal_data); + gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, srxd->gmsize, GM_LOW_PRIORITY, 0); + LGMNAL_GM_UNLOCK(nal_data); + + return(PTL_OK); +} + + + +/* + * The recevive thread + * This guy wait in gm_blocking_recvive and gets + * woken up when the myrinet adaptor gets an interrupt. + * Hands off processing of small messages and blocks again + */ +int +lgmnal_receive_thread(void *arg) +{ + lgmnal_data_t *nal_data; + gm_recv_event_t *rxevent = NULL; + gm_recv_t *recv = NULL; + void *buffer; + + if (!arg) { + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("RXTHREAD:: This is the lgmnal_receive_thread. NO nal_data. Exiting\n", arg)); + return(-1); + } + + nal_data = (lgmnal_data_t*)arg; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("RXTHREAD:: This is the lgmnal_receive_thread nal_data is [%p]\n", arg)); + + nal_data->rxthread_flag = LGMNAL_THREAD_STARTED; + while (nal_data->rxthread_flag == LGMNAL_THREAD_STARTED) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: lgmnal_receive_threads waiting for LGMNAL_CONTINUE flag\n")); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1024); + + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: calling daemonize\n")); + daemonize(); + LGMNAL_GM_LOCK(nal_data); + while(nal_data->rxthread_flag == LGMNAL_THREAD_CONTINUE) { + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Receive thread waiting\n")); + rxevent = gm_blocking_receive_no_spin(nal_data->gm_port); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: receive thread got [%s]\n", lgmnal_rxevent(rxevent))); + if (nal_data->rxthread_flag != LGMNAL_THREAD_CONTINUE) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Receive thread time to exit\n")); + break; + } + switch (GM_RECV_EVENT_TYPE(rxevent)) { + + case(GM_RECV_EVENT): + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: GM_RECV_EVENT\n")); + recv = (gm_recv_t*)&(rxevent->recv); + buffer = gm_ntohp(recv->buffer); + if (((lgmnal_msghdr_t*)buffer)->type == LGMNAL_SMALL_MESSAGE) { + LGMNAL_GM_UNLOCK(nal_data); + lgmnal_small_receive1(nal_data, recv); + LGMNAL_GM_LOCK(nal_data); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("RXTHREAD:: Unsupported message type\n")); + lgmnal_badrx_message(nal_data, recv, NULL); + } + break; + case(_GM_SLEEP_EVENT): + /* + * Blocking receive above just returns + * immediatly with _GM_SLEEP_EVENT + * Don't know what this is + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Sleeping in gm_unknown\n")); + LGMNAL_GM_UNLOCK(nal_data); + gm_unknown(nal_data->gm_port, rxevent); + LGMNAL_GM_LOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Awake from gm_unknown\n")); + break; + + default: + /* + * Don't know what this is + * gm_unknown will make sense of it + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("RXTHREAD:: Passing event to gm_unknown\n")); + gm_unknown(nal_data->gm_port, rxevent); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RXTHREAD:: Processed unknown event\n")); + + } + + + } + LGMNAL_GM_UNLOCK(nal_data); + nal_data->rxthread_flag = LGMNAL_THREAD_STOPPED; + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("RXTHREAD:: The lgmnal_receive_thread nal_data [%p] is exiting\n", nal_data)); + return(LGMNAL_STATUS_OK); +} + + +int +lgmnal_small_transmit(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, + ptl_nid_t global_nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, int size) +{ + lgmnal_data_t *nal_data = (lgmnal_data_t*)nal_cb->nal_data; + lgmnal_stxd_t *stxd = NULL; + void *buffer = NULL; + lgmnal_msghdr_t *msghdr = NULL; + int tot_size = 0; + unsigned int local_nid; + gm_status_t gm_status = GM_SUCCESS; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_small_transmit nal_cb [%p] private [%p] cookie [%p] hdr [%p] type [%d] global_nid [%u][%x] pid [%d] niov [%d] iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type, global_nid, global_nid, pid, niov, iov, size)); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("portals_hdr:: dest_nid [%lu], src_nid [%lu]\n", hdr->dest_nid, hdr->src_nid)); + + if (!nal_data) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("no nal_data\n")); + return(LGMNAL_STATUS_FAIL); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("nal_data [%p]\n", nal_data)); + } + + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, &local_nid); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to obtain local id\n")); + return(LGMNAL_STATUS_FAIL); + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local Node_id is [%u][%x]\n", local_nid, local_nid)); + + stxd = lgmnal_get_stxd(nal_data, 1); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("stxd [%p]\n", stxd)); + + stxd->type = LGMNAL_SMALL_MESSAGE; + stxd->cookie = cookie; + + /* + * Copy lgmnal_msg_hdr and portals header to the transmit buffer + * Then copy the data in + */ + buffer = stxd->buffer; + msghdr = (lgmnal_msghdr_t*)buffer; + + msghdr->magic = LGMNAL_MAGIC; + msghdr->type = LGMNAL_SMALL_MESSAGE; + msghdr->sender_node_id = nal_data->gm_global_nid; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing msghdr at [%p]\n", buffer)); + + buffer += sizeof(lgmnal_msghdr_t); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Advancing buffer pointer by [%x] to [%p]\n", sizeof(lgmnal_msghdr_t), buffer)); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing portals hdr at [%p]\n", buffer)); + gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); + + buffer += sizeof(ptl_hdr_t); + + while(niov--) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing iov [%p] len [%d] to [%p]\n", iov, iov->iov_len, buffer)); + gm_bcopy(iov->iov_base, buffer, iov->iov_len); + buffer+= iov->iov_len; + iov++; + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("sending\n")); + tot_size = size+sizeof(ptl_hdr_t)+sizeof(lgmnal_msghdr_t); + + + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_send_to_peer port [%p] buffer [%p] gmsize [%d] msize [%d] global_nid [%u][%x] local_nid[%d] stxd [%p]\n", + nal_data->gm_port, stxd->buffer, stxd->gmsize, tot_size, global_nid, global_nid, local_nid, stxd)); + LGMNAL_GM_LOCK(nal_data); + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, stxd->gmsize, tot_size, GM_LOW_PRIORITY, local_nid, lgmnal_small_tx_done, (void*)stxd); + + LGMNAL_GM_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("done\n")); + + return(PTL_OK); +} + + +void +lgmnal_small_tx_done(gm_port_t *gm_port, void *context, gm_status_t status) +{ + lgmnal_stxd_t *stxd = (lgmnal_stxd_t*)context; + lib_msg_t *cookie = stxd->cookie; + lgmnal_data_t *nal_data = (lgmnal_data_t*)stxd->nal_data; + nal_cb_t *nal_cb = nal_data->nal_cb; + + if (!stxd) { + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("send completion event for unknown stxd\n")); + return; + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Result of send stxd [%p] is [%s]\n", stxd, lgmnal_gm_error(status))); + /* TO DO figure out which sends are worth retrying and get a send token to retry */ + if (lib_finalize(nal_cb, stxd, cookie) != PTL_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Call to lib_finalize failed for stxd [%p]\n", stxd)); + } + lgmnal_return_stxd(nal_data, stxd); + return; +} + + +void +lgmnal_large_tx1_done(gm_port_t *gm_port, void *context, gm_status_t status) +{ + +} + +/* + * Begin a large transmit + */ +int +lgmnal_large_transmit1(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie, ptl_hdr_t *hdr, int type, + ptl_nid_t global_nid, ptl_pid_t pid, unsigned int niov, struct iovec *iov, int size) +{ + + lgmnal_data_t *nal_data; + lgmnal_stxd_t *stxd = NULL; + void *buffer = NULL; + lgmnal_msghdr_t *msghdr = NULL; + unsigned int local_nid; + int mlen = 0; /* the size of the init message data */ + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_large_transmit1 nal_cb [%p] private [%p], cookie [%p] hdr [%p], type [%d] global_nid [%u], pid [%d], + niov [%d], iov [%p], size [%d]\n", + nal_cb, private, cookie, hdr, type, global_nid, pid, niov, iov, size)); + + if (nal_cb) + nal_data = (lgmnal_data_t*)nal_cb->nal_data; + else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("no nal_cb.\n")); + return(LGMNAL_STATUS_FAIL); + } + + + /* + * TO DO large transmit uses stxd. Should it have control descriptor? + */ + stxd = lgmnal_get_stxd(nal_data, 1); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("stxd [%p]\n", stxd)); + + stxd->type = LGMNAL_LARGE_MESSAGE_INIT; + stxd->cookie = cookie; + + /* + * Copy lgmnal_msg_hdr and portals header to the transmit buffer + * Then copy the iov in + */ + buffer = stxd->buffer; + msghdr = (lgmnal_msghdr_t*)buffer; + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing msghdr at [%p]\n", buffer)); + + msghdr->magic = LGMNAL_MAGIC; + msghdr->type = LGMNAL_LARGE_MESSAGE_INIT; + msghdr->sender_node_id = nal_data->gm_global_nid; + msghdr->stxd = stxd; + buffer += sizeof(lgmnal_msghdr_t); + mlen = sizeof(lgmnal_msghdr_t); + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("processing portals hdr at [%p]\n", buffer)); + + gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t)); + buffer += sizeof(ptl_hdr_t); + mlen += sizeof(ptl_hdr_t); + + /* + * Store the iovs in the stxd for we can get them later + * in large_transmit2 + */ + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Copying iov [%p] to [%p]\n", iov, stxd->iov)); + gm_bcopy(iov, stxd->iov, niov*sizeof(struct iovec)); + stxd->niov = niov; + + /* + * Send the init message to the target + */ + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("sending mlen [%d]\n", mlen)); + LGMNAL_GM_LOCK(nal_data); + gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, stxd->gmsize, mlen, GM_LOW_PRIORITY, local_nid, lgmnal_large_tx1_done, (void*)stxd); + LGMNAL_GM_UNLOCK(nal_data); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("done\n")); + + return(PTL_OK); +} + + + + +EXPORT_SYMBOL(lgmnal_requeue_rxbuffer); +EXPORT_SYMBOL(lgmnal_badrx_message); +EXPORT_SYMBOL(lgmnal_large_tx1_done); +EXPORT_SYMBOL(lgmnal_large_transmit1); +EXPORT_SYMBOL(lgmnal_small_receive1); +EXPORT_SYMBOL(lgmnal_small_receive2); +EXPORT_SYMBOL(lgmnal_receive_thread); +EXPORT_SYMBOL(lgmnal_small_transmit); +EXPORT_SYMBOL(lgmnal_small_tx_done); diff --git a/lustre/portals/knals/lgmnal/lgmnal_module.c b/lustre/portals/knals/lgmnal/lgmnal_module.c new file mode 100644 index 0000000..51383fc --- /dev/null +++ b/lustre/portals/knals/lgmnal/lgmnal_module.c @@ -0,0 +1,127 @@ +/* + * This program was prepared by the Regents of the University of + * California at Los Alamos National Laboratory (the University) under + * contract number W-7405-ENG-36 with the U.S. Department of Energy + * (DoE). Neither the U.S. Government nor the + * University makes any warranty, express or implied, or assumes any + * liability or responsibility for the use of this software. + */ + + +#include "lgmnal.h" + + +ptl_handle_ni_t lgmnal_ni; + + +int +lgmnal_cmd(struct portal_ioctl_data *data, void *private) +{ + lgmnal_data_t *nal_data = NULL; + char *name = NULL; + int nid = -2; + int gnid; + gm_status_t gm_status; + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_cmd [d] private [%p]\n", data->ioc_nal_cmd, private)); + nal_data = (lgmnal_data_t*)private; + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("nal_data is [%p]\n", nal_data)); + switch(data->ioc_nal_cmd) { + /* + * just reuse already defined GET_NID. Should define LGMNAL version + */ + case(LGMNAL_IOC_GET_GNID): + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("lgmnal_cmd GETNID (Get GM Global Network Id\n")); + + PORTAL_ALLOC(name, data->ioc_plen1); + copy_from_user(name, data->ioc_pbuf1, data->ioc_plen1); + + LGMNAL_GM_LOCK(nal_data); + nid = gm_host_name_to_node_id(nal_data->gm_port, name); + LGMNAL_GM_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Local node id is [%d]\n", nid)); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_node_id_to_global_id(nal_data->gm_port, nid, &gnid); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_node_id_to_global_id failed\n", gm_status)); + return(-1); + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Global node is is [%u][%x]\n", gnid, gnid)); + copy_to_user(data->ioc_pbuf2, &gnid, data->ioc_plen2); + break; + default: + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_cmd UNKNOWN[%d]\n", data->ioc_nal_cmd)); + data->ioc_nid2 = -1; + } + + + return(0); +} + +int lgmnal_small_msg_size = 81920; +int lgmnal_debug_level = 1; + +int +init_module() +{ + int status; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("This is the lgmnal module initialisation routine\n")); + + + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling lgmnal_init\n")); + status = PtlNIInit(lgmnal_init, 32, 4, 0, &lgmnal_ni); + if (status == PTL_OK) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Portals LGMNAL initialised ok lgmnal_ni [%lx]\n", lgmnal_ni)); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Portals LGMNAL Failed to initialise\n")); + return(1); + + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling kportal_nal_register\n")); + /* + * global_nal_data is set by lgmnal_init + */ + if (kportal_nal_register(LGMNAL, &lgmnal_cmd, global_nal_data) != 0) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("kportal_nal_register failed\n")); + return(1); + } + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Calling PORTAL_SYMBOL_REGISTER\n")); + PORTAL_SYMBOL_REGISTER(lgmnal_ni); + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("This is the end of the lgmnal module initialisation routine")); + + + return(0); +} + + +void cleanup_module() +{ + int interface=0; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Cleaning up lgmnal module")); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Interface [%d] Calling shutdown\n", interface)); + kportal_nal_unregister(LGMNAL); + PORTAL_SYMBOL_UNREGISTER(lgmnal_ni); + lgmnal_fini(); + global_nal_data = NULL; + return; +} + + +EXPORT_SYMBOL(lgmnal_ni); +EXPORT_SYMBOL(lgmnal_debug_level); + +MODULE_PARM(lgmnal_small_msg_size, "i"); +MODULE_PARM(lgmnal_debug_level, "i"); + +MODULE_AUTHOR("Morgan Doyle. morgan.doyle@hp.com"); + +MODULE_DESCRIPTION("A Portals kernel NAL for Myrinet GM2. [0= DEFAULT_LEN) { + PORTAL_ALLOC(varbuf, len+1+8); + if (!varbuf) { + printk("lgmnal_cb_printf Failed to malloc\n"); + printk("Truncated message is\n"); + printk(fixedbuf); + va_end(ap); + return; + } + sprintf(varbuf, "LGMNAL::"); + len = vsnprintf(varbuf+8, len+1, fmt, ap); + } else { + varbuf = fixedbuf; + } + va_end(ap); + printk(varbuf); + if (fixedbuf != varbuf) + PORTAL_FREE(varbuf, len+1+8); + return; +} + + +/* + * allocate a number of small tx buffers and register with GM + * so they are wired and set up for DMA. This is a costly operation. + * Also allocate a corrosponding descriptor to keep track of + * the buffer. + * Put all descriptors on singly linked list to be available to send function. + * This function is only called when the API mutex is held (init or shutdown), + * so there is no need to hold the txd spinlock. + */ +int +lgmnal_alloc_stxd(lgmnal_data_t *nal_data) +{ + int ntx = 0, nstx = 0, i = 0; + lgmnal_stxd_t *txd = NULL; + void *txbuffer = NULL; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_alloc_small tx\n")); + + LGMNAL_GM_LOCK(nal_data); + ntx = gm_num_send_tokens(nal_data->gm_port); + LGMNAL_GM_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("total number of send tokens available is [%d]\n", ntx)); + + nstx = ntx/2; + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocated [%d] send tokens to small messages\n", nstx)); + + +#ifdef LGMNAL_USE_GM_HASH + nal_data->stxd_hash = gm_create_hash(gm_hash_compare_ptrs, gm_hash_hash_ptr, 0, sizeof(void*), nstx, 0); + if (!nal_data->srxd_hash) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to create hash table\n\n")); + return(LGMNAL_STATUS_NOMEM); + } +#else + nal_data->stxd_hash = NULL; +#endif + + /* + * A semaphore is initialised with the + * number of transmit tokens available. + * To get a stxd, acquire the token semaphore. + * this decrements the available token count + * (if no tokens you block here, someone returning a + * stxd will release the semaphore and wake you) + * When token is obtained acquire the spinlock + * to manipulate the list + */ + LGMNAL_TXD_TOKEN_INIT(nal_data, nstx); + LGMNAL_TXD_LOCK_INIT(nal_data); + + for (i=0; i<=nstx; i++) { + PORTAL_ALLOC(txd, sizeof(lgmnal_stxd_t)); + if (!txd) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc txd [%d]\n", i)); + return(LGMNAL_STATUS_NOMEM); + } +#if 0 + PORTAL_ALLOC(txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); + if (!txbuffer) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc txbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); + PORTAL_FREE(txd, sizeof(lgmnal_stxd_t)); + return(LGMNAL_STATUS_FAIL); + } + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_register_memory with port [%p] txbuffer [%p], size [%d]\n", + nal_data->gm_port, txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data))); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, txbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_register_memory failed buffer [%p], index [%d]\n", txbuffer, i)); + switch(gm_status) { + case(GM_FAILURE): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_FAILURE\n")); + break; + case(GM_PERMISSION_DENIED): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_PERMISSION_DENIED\n")); + break; + case(GM_INVALID_PARAMETER): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_INVALID_PARAMETER\n")); + break; + default: + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Unknown error\n")); + break; + } + return(LGMNAL_STATUS_FAIL); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("gm_register_memory ok for buffer [%p], index [%d]\n", txbuffer, i)); + } +#else + LGMNAL_GM_LOCK(nal_data); + txbuffer = gm_dma_malloc(nal_data->gm_port, LGMNAL_SMALL_MSG_SIZE(nal_data)); + LGMNAL_GM_UNLOCK(nal_data); + if (!txbuffer) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to gm_dma_malloc txbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); + PORTAL_FREE(txd, sizeof(lgmnal_stxd_t)); + return(LGMNAL_STATUS_FAIL); + } +#endif + + txd->buffer = txbuffer; + txd->size = LGMNAL_SMALL_MSG_SIZE(nal_data); + txd->gmsize = gm_min_size_for_length(txd->size); + txd->nal_data = (struct _lgmnal_data_t*)nal_data; + + if (lgmnal_hash_add(&nal_data->stxd_hash, (void*)txbuffer, (void*)txd)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to create hash entry\n")); + return(LGMNAL_STATUS_FAIL); + } + + + txd->next = nal_data->stxd; + nal_data->stxd = txd; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Registered txd [%p] with buffer [%p], size [%d]\n", txd, txd->buffer, txd->size)); + } + + return(LGMNAL_STATUS_OK); +} + +/* Free the list of wired and gm_registered small tx buffers and the tx descriptors + that go along with them. + * This function is only called when the API mutex is held (init or shutdown), + * so there is no need to hold the txd spinlock. + */ +void +lgmnal_free_stxd(lgmnal_data_t *nal_data) +{ + lgmnal_stxd_t *txd = nal_data->stxd, *_txd = NULL; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_free_small tx\n")); + + while(txd) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Freeing txd [%p] with buffer [%p], size [%d]\n", txd, txd->buffer, txd->size)); + _txd = txd; + txd = txd->next; +#if 0 + LGMNAL_GM_LOCK(nal_data); + gm_deregister_memory(nal_data->gm_port, _txd->buffer, _txd->size); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_txd->buffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); +#else + LGMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _txd->buffer); + LGMNAL_GM_UNLOCK(nal_data); +#endif + PORTAL_FREE(_txd, sizeof(lgmnal_stxd_t)); + } + return; +} + + +/* + * Get a txd from the list + * This get us a wired and gm_registered small tx buffer. + * This implicitly gets us a send token also. + */ +lgmnal_stxd_t * +lgmnal_get_stxd(lgmnal_data_t *nal_data, int block) +{ + + lgmnal_stxd_t *txd = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_get_stxd nal_data [%p] block[%d]\n", + nal_data, block)); + + if (block) { + LGMNAL_TXD_GETTOKEN(nal_data); + } else { + if (LGMNAL_TXD_TRYGETTOKEN(nal_data)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_get_stxd can't get token\n")); + return(NULL); + } + } + LGMNAL_TXD_LOCK(nal_data); + txd = nal_data->stxd; + if (txd) + nal_data->stxd = txd->next; + LGMNAL_TXD_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_get_stxd got [%p], head is [%p]\n", txd, nal_data->stxd)); + return(txd); +} + +/* + * Return a txd to the list + */ +void +lgmnal_return_stxd(lgmnal_data_t *nal_data, lgmnal_stxd_t *txd) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_return_stxd nal_data [%p], txd[%p]\n", nal_data, txd)); + + LGMNAL_TXD_LOCK(nal_data); + txd->next = nal_data->stxd; + nal_data->stxd = txd; + LGMNAL_TXD_UNLOCK(nal_data); + LGMNAL_TXD_RETURNTOKEN(nal_data); + return; +} + + +/* + * allocate a number of small rx buffers and register with GM + * so they are wired and set up for DMA. This is a costly operation. + * Also allocate a corrosponding descriptor to keep track of + * the buffer. + * Put all descriptors on singly linked list to be available to receive thread. + * This function is only called when the API mutex is held (init or shutdown), + * so there is no need to hold the rxd spinlock. + */ +int +lgmnal_alloc_srxd(lgmnal_data_t *nal_data) +{ + int nrx = 0, nsrx = 0, i = 0; + lgmnal_srxd_t *rxd = NULL; + void *rxbuffer = NULL; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_alloc_small rx\n")); + + LGMNAL_GM_LOCK(nal_data); + nrx = gm_num_receive_tokens(nal_data->gm_port); + LGMNAL_GM_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("total number of receive tokens available is [%d]\n", nrx)); + + nsrx = nrx/2; + + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Allocated [%d] receive tokens to small messages\n", nsrx)); + + +#ifdef LGMNAL_USE_GM_HASH + LGMNAL_GM_LOCK(nal_data); + nal_data->srxd_hash = gm_create_hash(gm_hash_compare_ptrs, gm_hash_hash_ptr, 0, sizeof(void*), nsrx, 0); + LGMNAL_GM_UNLOCK(nal_data); + if (!nal_data->srxd_hash) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to create hash table\n")); + return(LGMNAL_STATUS_NOMEM); + } +#else + nal_data->srxd_hash = NULL; +#endif + + LGMNAL_RXD_TOKEN_INIT(nal_data, nsrx); + LGMNAL_RXD_LOCK_INIT(nal_data); + + for (i=0; i<=nsrx; i++) { + PORTAL_ALLOC(rxd, sizeof(lgmnal_srxd_t)); + if (!rxd) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc rxd [%d]\n", i)); + return(LGMNAL_STATUS_NOMEM); + } +#if 0 + PORTAL_ALLOC(rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); + if (!rxbuffer) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to malloc rxbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); + PORTAL_FREE(rxd, sizeof(lgmnal_srxd_t)); + return(LGMNAL_STATUS_FAIL); + } + LGMNAL_PRINT(LGMNAL_DEBUG_V, ("Calling gm_register_memory with port [%p] rxbuffer [%p], size [%d]\n", + nal_data->gm_port, rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data))); + LGMNAL_GM_LOCK(nal_data); + gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, LGMNAL_SMALL_MSG_SIZE(nal_data)); + LGMNAL_GM_UNLOCK(nal_data); + if (gm_status != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("gm_register_memory failed buffer [%p], index [%d]\n", rxbuffer, i)); + switch(gm_status) { + case(GM_FAILURE): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_FAILURE\n")); + break; + case(GM_PERMISSION_DENIED): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_PERMISSION_DENIED\n")); + break; + case(GM_INVALID_PARAMETER): + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("GM_INVALID_PARAMETER\n")); + break; + default: + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Unknown GM error[%d]\n", gm_status)); + break; + + } + return(LGMNAL_STATUS_FAIL); + } +#else + LGMNAL_GM_LOCK(nal_data); + rxbuffer = gm_dma_malloc(nal_data->gm_port, LGMNAL_SMALL_MSG_SIZE(nal_data)); + LGMNAL_GM_UNLOCK(nal_data); + if (!rxbuffer) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("Failed to gm_dma_malloc rxbuffer [%d], size [%d]\n", i, LGMNAL_SMALL_MSG_SIZE(nal_data))); + PORTAL_FREE(rxd, sizeof(lgmnal_srxd_t)); + return(LGMNAL_STATUS_FAIL); + } +#endif + + rxd->buffer = rxbuffer; + rxd->size = LGMNAL_SMALL_MSG_SIZE(nal_data); + rxd->gmsize = gm_min_size_for_length(rxd->size); + + if (lgmnal_hash_add(&nal_data->srxd_hash, (void*)rxbuffer, (void*)rxd) != GM_SUCCESS) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("failed to create hash entry rxd[%p] for rxbuffer[%p]\n", rxd, rxbuffer)); + return(LGMNAL_STATUS_FAIL); + } + + rxd->next = nal_data->srxd; + nal_data->srxd = rxd; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Registered rxd [%p] with buffer [%p], size [%d]\n", rxd, rxd->buffer, rxd->size)); + } + + return(LGMNAL_STATUS_OK); +} + + + +/* Free the list of wired and gm_registered small rx buffers and the rx descriptors + * that go along with them. + * This function is only called when the API mutex is held (init or shutdown), + * so there is no need to hold the rxd spinlock. + */ +void +lgmnal_free_srxd(lgmnal_data_t *nal_data) +{ + lgmnal_srxd_t *rxd = nal_data->srxd, *_rxd = NULL; + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_free_small rx\n")); + + while(rxd) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Freeing rxd [%p] with buffer [%p], size [%d]\n", rxd, rxd->buffer, rxd->size)); + _rxd = rxd; + rxd = rxd->next; + +#if 0 + LGMNAL_GM_LOCK(nal_data); + gm_deregister_memory(nal_data->gm_port, _rxd->buffer, _rxd->size); + LGMNAL_GM_UNLOCK(nal_data); + PORTAL_FREE(_rxd->buffer, LGMNAL_SMALL_RXBUFFER_SIZE); +#else + LGMNAL_GM_LOCK(nal_data); + gm_dma_free(nal_data->gm_port, _rxd->buffer); + LGMNAL_GM_UNLOCK(nal_data); +#endif + PORTAL_FREE(_rxd, sizeof(lgmnal_srxd_t)); + } + return; +} + + +/* + * Get a rxd from the free list + * This get us a wired and gm_registered small rx buffer. + * This implicitly gets us a receive token also. + */ +lgmnal_srxd_t * +lgmnal_get_srxd(lgmnal_data_t *nal_data, int block) +{ + + lgmnal_srxd_t *rxd = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_get_srxd nal_data [%p] block [%d]\n", nal_data, block)); + + if (block) { + LGMNAL_RXD_GETTOKEN(nal_data); + } else { + if (LGMNAL_RXD_TRYGETTOKEN(nal_data)) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_get_srxd Can't get token\n")); + return(NULL); + } + } + LGMNAL_RXD_LOCK(nal_data); + rxd = nal_data->srxd; + if (rxd) + nal_data->srxd = rxd->next; + LGMNAL_RXD_UNLOCK(nal_data); + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_get_srxd got [%p], head is [%p]\n", rxd, nal_data->srxd)); + return(rxd); +} + +/* + * Return an rxd to the list + */ +void +lgmnal_return_srxd(lgmnal_data_t *nal_data, lgmnal_srxd_t *rxd) +{ + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_return_srxd nal_data [%p], rxd[%p]\n", nal_data, rxd)); + + LGMNAL_RXD_LOCK(nal_data); + rxd->next = nal_data->srxd; + nal_data->srxd = rxd; + LGMNAL_RXD_UNLOCK(nal_data); + LGMNAL_RXD_RETURNTOKEN(nal_data); + return; +} + +/* + * Given a pointer to a srxd find + * the relevant descriptor for it + * This is done by searching a hash + * list that is created when the srxd's + * are created + */ +lgmnal_srxd_t * +lgmnal_rxbuffer_to_srxd(lgmnal_data_t *nal_data, void *rxbuffer) +{ + lgmnal_srxd_t *srxd = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_rxbuffer_to_srxd nal_data [%p], rxbuffer [%p]\n", nal_data, rxbuffer)); +#ifdef LGMNAL_USE_GM_HASH + srxd = gm_hash_find(nal_data->srxd_hash, rxbuffer); +#else + srxd = lgmnal_hash_find(nal_data->srxd_hash, rxbuffer); +#endif + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("srxd is [%p]\n", srxd)); + return(srxd); +} + + +void +lgmnal_stop_rxthread(lgmnal_data_t *nal_data) +{ + int delay = 15; + + + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("Attempting to stop rxthread nal_data [%p]\n", nal_data)); + + if (nal_data->rxthread_flag != LGMNAL_THREAD_CONTINUE) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("thread flag not correctly set\n")); + } + + nal_data->rxthread_flag = LGMNAL_THREAD_STOP; + LGMNAL_GM_LOCK(nal_data); + gm_set_alarm(nal_data->gm_port, &nal_data->rxthread_alarm, 10, NULL, NULL); + LGMNAL_GM_UNLOCK(nal_data); + + while(nal_data->rxthread_flag == LGMNAL_THREAD_STOP && delay--) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_stop_rxthread sleeping\n")); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1024); + } + + if (nal_data->rxthread_flag == LGMNAL_THREAD_STOP) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("I DON'T KNOW HOW TO WAKE THE THREAD\n")); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("RX THREAD SEEMS TO HAVE STOPPED\n")); + } + +} + + + +char * +lgmnal_gm_error(gm_status_t status) +{ + switch(status) { + case(GM_SUCCESS): + return("SUCCESS"); + case(GM_FAILURE): + return("FAILURE"); + case(GM_INPUT_BUFFER_TOO_SMALL): + return("INPUT_BUFFER_TOO_SMALL"); + case(GM_OUTPUT_BUFFER_TOO_SMALL): + return("OUTPUT_BUFFER_TOO_SMALL"); + case(GM_TRY_AGAIN ): + return("TRY_AGAIN"); + case(GM_BUSY): + return("BUSY"); + case(GM_MEMORY_FAULT): + return("MEMORY_FAULT"); + case(GM_INTERRUPTED): + return("INTERRUPTED"); + case(GM_INVALID_PARAMETER): + return("INVALID_PARAMETER"); + case(GM_OUT_OF_MEMORY): + return("OUT_OF_MEMORY"); + case(GM_INVALID_COMMAND): + return("INVALID_COMMAND"); + case(GM_PERMISSION_DENIED): + return("PERMISSION_DENIED"); + case(GM_INTERNAL_ERROR): + return("INTERNAL_ERROR"); + case(GM_UNATTACHED): + return("UNATTACHED"); + case(GM_UNSUPPORTED_DEVICE): + return("UNSUPPORTED_DEVICE"); + case(GM_SEND_TIMED_OUT): + return("GM_SEND_TIMEDOUT"); + case(GM_SEND_REJECTED): + return("GM_SEND_REJECTED"); + case(GM_SEND_TARGET_PORT_CLOSED): + return("GM_SEND_TARGET_PORT_CLOSED"); + case(GM_SEND_TARGET_NODE_UNREACHABLE): + return("GM_SEND_TARGET_NODE_UNREACHABLE"); + case(GM_SEND_DROPPED): + return("GM_SEND_DROPPED"); + case(GM_SEND_PORT_CLOSED): + return("GM_SEND_PORT_CLOSED"); + case(GM_NODE_ID_NOT_YET_SET): + return("GM_NODE_ID_NOT_YET_SET"); + case(GM_STILL_SHUTTING_DOWN): + return("GM_STILL_SHUTTING_DOWN"); + case(GM_CLONE_BUSY): + return("GM_CLONE_BUSY"); + case(GM_NO_SUCH_DEVICE): + return("GM_NO_SUCH_DEVICE"); + case(GM_ABORTED): + return("GM_ABORTED"); + case(GM_INCOMPATIBLE_LIB_AND_DRIVER): + return("GM_INCOMPATIBLE_LIB_AND_DRIVER"); + case(GM_UNTRANSLATED_SYSTEM_ERROR): + return("GM_UNTRANSLATED_SYSTEM_ERROR"); + case(GM_ACCESS_DENIED): + return("GM_ACCESS_DENIED"); + + +/* + * These ones are in the docs but aren't in the header file + case(GM_DEV_NOT_FOUND): + return("GM_DEV_NOT_FOUND"); + case(GM_INVALID_PORT_NUMBER): + return("GM_INVALID_PORT_NUMBER"); + case(GM_UC_ERROR): + return("GM_US_ERROR"); + case(GM_PAGE_TABLE_FULL): + return("GM_PAGE_TABLE_FULL"); + case(GM_MINOR_OVERFLOW): + return("GM_MINOR_OVERFLOW"); + case(GM_SEND_ORPHANED): + return("GM_SEND_ORPHANED"); + case(GM_HARDWARE_FAULT): + return("GM_HARDWARE_FAULT"); + case(GM_DATA_CORRUPTED): + return("GM_DATA_CORRUPTED"); + case(GM_TIMED_OUT): + return("GM_TIMED_OUT"); + case(GM_USER_ERROR): + return("GM_USER_ERROR"); + case(GM_NO_MATCH): + return("GM_NOMATCH"); + case(GM_NOT_SUPPORTED_IN_KERNEL): + return("GM_NOT_SUPPORTED_IN_KERNEL"); + case(GM_NOT_SUPPORTED_ON_ARCH): + return("GM_NOT_SUPPORTED_ON_ARCH"); + case(GM_PTE_REF_CNT_OVERFLOW): + return("GM_PTR_REF_CNT_OVERFLOW"); + case(GM_NO_DRIVER_SUPPORT): + return("GM_NO_DRIVER_SUPPORT"); + case(GM_FIRMWARE_NOT_RUNNING): + return("GM_FIRMWARE_NOT_RUNNING"); + + * These ones are in the docs but aren't in the header file + */ + default: + return("UNKNOWN GM ERROR CODE"); + } +} + + +char * +lgmnal_rxevent(gm_recv_event_t *ev) +{ + short event; + char msg[24]; + event = GM_RECV_EVENT_TYPE(ev); + switch(event) { + case(GM_NO_RECV_EVENT): + return("GM_NO_RECV_EVENT"); + case(GM_SENDS_FAILED_EVENT): + return("GM_SEND_FAILED_EVENT"); + case(GM_ALARM_EVENT): + return("GM_ALARM_EVENT"); + case(GM_SENT_EVENT): + return("GM_SENT_EVENT"); + case(_GM_SLEEP_EVENT): + return("_GM_SLEEP_EVENT"); + case(GM_RAW_RECV_EVENT): + return("GM_RAW_RECV_EVENT"); + case(GM_BAD_SEND_DETECTED_EVENT): + return("GM_BAD_SEND_DETECTED_EVENT"); + case(GM_SEND_TOKEN_VIOLATION_EVENT): + return("GM_SEND_TOKEN_VIOLATION_EVENT"); + case(GM_RECV_TOKEN_VIOLATION_EVENT): + return("GM_RECV_TOKEN_VIOLATION_EVENT"); + case(GM_BAD_RECV_TOKEN_EVENT): + return("GM_BAD_RECV_TOKEN_EVENT"); + case(GM_ALARM_VIOLATION_EVENT): + return("GM_ALARM_VIOLATION_EVENT"); + case(GM_RECV_EVENT): + return("GM_RECV_EVENT"); + case(GM_HIGH_RECV_EVENT): + return("GM_HIGH_RECV_EVENT"); + case(GM_PEER_RECV_EVENT): + return("GM_PEER_RECV_EVENT"); + case(GM_HIGH_PEER_RECV_EVENT): + return("GM_HIGH_PEER_RECV_EVENT"); + case(GM_FAST_RECV_EVENT): + return("GM_FAST_RECV_EVENT"); + case(GM_FAST_HIGH_RECV_EVENT): + return("GM_FAST_HIGH_RECV_EVENT"); + case(GM_FAST_PEER_RECV_EVENT): + return("GM_FAST_PEER_RECV_EVENT"); + case(GM_FAST_HIGH_PEER_RECV_EVENT): + return("GM_FAST_HIGH_PEER_RECV_EVENT"); + case(GM_REJECTED_SEND_EVENT): + return("GM_REJECTED_SEND_EVENT"); + case(GM_ORPHANED_SEND_EVENT): + return("GM_ORPHANED_SEND_EVENT"); + case(GM_BAD_RESEND_DETECTED_EVENT): + return("GM_BAD_RESEND_DETETED_EVENT"); + case(GM_DROPPED_SEND_EVENT): + return("GM_DROPPED_SEND_EVENT"); + case(GM_BAD_SEND_VMA_EVENT): + return("GM_BAD_SEND_VMA_EVENT"); + case(GM_BAD_RECV_VMA_EVENT): + return("GM_BAD_RECV_VMA_EVENT"); + case(_GM_FLUSHED_ALARM_EVENT): + return("GM_FLUSHED_ALARM_EVENT"); + case(GM_SENT_TOKENS_EVENT): + return("GM_SENT_TOKENS_EVENTS"); + case(GM_IGNORE_RECV_EVENT): + return("GM_IGNORE_RECV_EVENT"); + case(GM_ETHERNET_RECV_EVENT): + return("GM_ETHERNET_RECV_EVENT"); + case(GM_NEW_NO_RECV_EVENT): + return("GM_NEW_NO_RECV_EVENT"); + case(GM_NEW_SENDS_FAILED_EVENT): + return("GM_NEW_SENDS_FAILED_EVENT"); + case(GM_NEW_ALARM_EVENT): + return("GM_NEW_ALARM_EVENT"); + case(GM_NEW_SENT_EVENT): + return("GM_NEW_SENT_EVENT"); + case(_GM_NEW_SLEEP_EVENT): + return("GM_NEW_SLEEP_EVENT"); + case(GM_NEW_RAW_RECV_EVENT): + return("GM_NEW_RAW_RECV_EVENT"); + case(GM_NEW_BAD_SEND_DETECTED_EVENT): + return("GM_NEW_BAD_SEND_DETECTED_EVENT"); + case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT): + return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT"); + case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT): + return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT"); + case(GM_NEW_BAD_RECV_TOKEN_EVENT): + return("GM_NEW_BAD_RECV_TOKEN_EVENT"); + case(GM_NEW_ALARM_VIOLATION_EVENT): + return("GM_NEW_ALARM_VIOLATION_EVENT"); + case(GM_NEW_RECV_EVENT): + return("GM_NEW_RECV_EVENT"); + case(GM_NEW_HIGH_RECV_EVENT): + return("GM_NEW_HIGH_RECV_EVENT"); + case(GM_NEW_PEER_RECV_EVENT): + return("GM_NEW_PEER_RECV_EVENT"); + case(GM_NEW_HIGH_PEER_RECV_EVENT): + return("GM_NEW_HIGH_PEER_RECV_EVENT"); + case(GM_NEW_FAST_RECV_EVENT): + return("GM_NEW_FAST_RECV_EVENT"); + case(GM_NEW_FAST_HIGH_RECV_EVENT): + return("GM_NEW_FAST_HIGH_RECV_EVENT"); + case(GM_NEW_FAST_PEER_RECV_EVENT): + return("GM_NEW_FAST_PEER_RECV_EVENT"); + case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT): + return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT"); + case(GM_NEW_REJECTED_SEND_EVENT): + return("GM_NEW_REJECTED_SEND_EVENT"); + case(GM_NEW_ORPHANED_SEND_EVENT): + return("GM_NEW_ORPHANED_SEND_EVENT"); + case(_GM_NEW_PUT_NOTIFICATION_EVENT): + return("_GM_NEW_PUT_NOTIFICATION_EVENT"); + case(GM_NEW_FREE_SEND_TOKEN_EVENT): + return("GM_NEW_FREE_SEND_TOKEN_EVENT"); + case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT): + return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT"); + case(GM_NEW_BAD_RESEND_DETECTED_EVENT): + return("GM_NEW_BAD_RESEND_DETECTED_EVENT"); + case(GM_NEW_DROPPED_SEND_EVENT): + return("GM_NEW_DROPPED_SEND_EVENT"); + case(GM_NEW_BAD_SEND_VMA_EVENT): + return("GM_NEW_BAD_SEND_VMA_EVENT"); + case(GM_NEW_BAD_RECV_VMA_EVENT): + return("GM_NEW_BAD_RECV_VMA_EVENT"); + case(_GM_NEW_FLUSHED_ALARM_EVENT): + return("GM_NEW_FLUSHED_ALARM_EVENT"); + case(GM_NEW_SENT_TOKENS_EVENT): + return("GM_NEW_SENT_TOKENS_EVENT"); + case(GM_NEW_IGNORE_RECV_EVENT): + return("GM_NEW_IGNORE_RECV_EVENT"); + case(GM_NEW_ETHERNET_RECV_EVENT): + return("GM_NEW_ETHERNET_RECV_EVENT"); + default: + snprintf(msg, 24, "Unknown Recv event [%d]", event); + return(msg); +#if 0 + case(/* _GM_PUT_NOTIFICATION_EVENT */ + case(/* GM_FREE_SEND_TOKEN_EVENT */ + case(/* GM_FREE_HIGH_SEND_TOKEN_EVENT */ +#endif + } +} + + +void +lgmnal_yield(int delay) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); +} + +int +lgmnal_is_small_message(lgmnal_data_t *nal_data, int niov, struct iovec *iov, int len) +{ + + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_is_small_message len is [%d]\n", len)); + if (len < LGMNAL_SMALL_MSG_SIZE(nal_data)) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("Yep, small message]\n")); + return(1); + } else { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("No, not small message]\n")); + return(0); + } +} + +void * +lgmnal_hash_find(lgmnal_hash_t *hash, void *key) +{ + void *data = NULL; + int count = 0; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_find hash [%p] key [%p]\n", hash, key)); + + while (hash) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find Stepping [%d]\n", count++)); + if (hash->key == key) { + data = hash->data; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find hash got data[%p]\n", data)); + return(data); + } else + hash = hash->next; + } + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_find data not found\n")); + return(NULL); +} + +/* + * TO DO hash. figure out why getting bad stuff from gm_hash and thne use it. + */ + +int +lgmnal_hash_add(lgmnal_hash_t **hash, void *key, void *data) +{ + +#ifdef LGMNAL_USE_GM_HASH + return(gm_hash_insert(*hash, (void*)key, (void*)data); +#else + lgmnal_hash_t *new = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_add hash [%p]\n", *hash)); + PORTAL_ALLOC(new, sizeof(lgmnal_hash_t)); + memset(new, 0, sizeof(lgmnal_hash_t)); + if (!new) { + LGMNAL_PRINT(LGMNAL_DEBUG_ERR, ("lgmnal_hash_add :: can't get memory\n")); + return(-1); + } + new->data = data; + new->key = key; + new->next = *hash; + *hash = new; + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_add hash head [%p]\n", *hash)); + return(0); +#endif +} + +void +lgmnal_hash_free(lgmnal_hash_t **hash) +{ + + lgmnal_hash_t *_hash = NULL; + LGMNAL_PRINT(LGMNAL_DEBUG_TRACE, ("lgmnal_hash_free hash [p%]\n", *hash)); + + while (*hash) { + LGMNAL_PRINT(LGMNAL_DEBUG_VV, ("lgmnal_hash_free freeing hash [p%]\n", _hash)); + _hash = *hash; + *hash = _hash->next; + PORTAL_FREE(_hash, sizeof(lgmnal_hash_t)); + } + return; +} + + +EXPORT_SYMBOL(lgmnal_yield); +EXPORT_SYMBOL(lgmnal_print); +EXPORT_SYMBOL(lgmnal_alloc_srxd); +EXPORT_SYMBOL(lgmnal_get_srxd); +EXPORT_SYMBOL(lgmnal_return_srxd); +EXPORT_SYMBOL(lgmnal_free_srxd); +EXPORT_SYMBOL(lgmnal_alloc_stxd); +EXPORT_SYMBOL(lgmnal_get_stxd); +EXPORT_SYMBOL(lgmnal_return_stxd); +EXPORT_SYMBOL(lgmnal_free_stxd); +EXPORT_SYMBOL(lgmnal_rxbuffer_to_srxd); +EXPORT_SYMBOL(lgmnal_rxevent); +EXPORT_SYMBOL(lgmnal_gm_error); +EXPORT_SYMBOL(lgmnal_stop_rxthread); -- 1.8.3.1