+++ /dev/null
--Kernelenv
--Makefile
--autoMakefile
--autoMakefile.in
--aclocal.m4
--autom4te.cache
--config.log
--config.status
--configure
--.*.cmd
--.depend
+++ /dev/null
--EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
--# portals/utils/debug.c wants <linux/version.h> from userspace. sigh.
--HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
--LIBREADLINE := @LIBREADLINE@
--# 2.5's makefiles aren't nice to cross dir libraries in host programs
--PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
+++ /dev/null
--EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
--HOSTCFLAGS := $(EXTRA_CFLAGS)
--# the kernel doesn't want us to build archives for host binaries :/
--PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
+++ /dev/null
--subdir-m += libcfs
--
--cray-subdirs += portals
--cray-subdirs += knals
--cray-subdirs += router
--cray-subdirs += tests
--@CRAY_PORTALS_FALSE@subdir-m += $(cray-subdirs)
--
--@INCLUDE_RULES@
+++ /dev/null
--include $(src)/Kernelenv
--
--# The ordering of these determines the order that each subsystem's
--# module_init() functions are called in. if these are changed make sure
--# they reflect the dependencies between each subsystem's _init functions.
--obj-y += libcfs/
--obj-y += portals/
--obj-y += router/
--obj-y += knals/
--obj-y += tests/
--
--obj-m += utils/
+++ /dev/null
--# -------- we can't build modules unless srcdir = builddir
--if test x$enable_modules != xno ; then
-- AC_CHECK_FILE([autoMakefile.am],[],
-- [AC_MSG_ERROR([At this time, Lustre does not support building kernel modules with srcdir != buildir.])])
--fi
--
--# -------- in kernel compilation? (2.5 only) -------------
--AC_MSG_CHECKING([if inkernel build support is requested])
--AC_ARG_ENABLE([inkernel],
-- AC_HELP_STRING([--enable-inkernel],
-- [set up 2.5 kernel makefiles]),
-- [],[enable_inkernel=no])
--AC_MSG_RESULT([$enable_inkernel])
--AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes)
--
--# -------- are we building against an external portals? -------
--AC_MSG_CHECKING([if Cray portals should be used])
--AC_ARG_WITH([cray-portals],
-- AC_HELP_STRING([--with-cray-portals=path],
-- [path to cray portals]),
-- [
- CRAY_PORTALS_INCLUDE="-I$with_cray_portals"
- AC_DEFINE(CRAY_PORTALS, 1, [Building with Cray Portals])
- if test "$with_cray_portals" != no; then
- if test -r $with_cray_portals/include/portals/api.h ; then
- CRAY_PORTALS_PATH=$with_cray_portals
- CRAY_PORTALS_INCLUDE="-I$with_cray_portals/include"
- AC_DEFINE(CRAY_PORTALS, 1, [Building with Cray Portals])
- else
- AC_MSG_ERROR([--with-cray-portals specified badly])
- fi
- fi
-- ],[with_cray_portals=no])
-AC_SUBST(CRAY_PORTALS_PATH)
--AC_MSG_RESULT([$with_cray_portals])
-
--AM_CONDITIONAL(CRAY_PORTALS, test x$with_cray_portals != xno)
-
-# -------- enable tests and utils? -------
--if test x$enable_tests = xno ; then
-- AC_MSG_NOTICE([disabling tests])
-- enable_tests=no
--fi
--if test x$enable_utils = xno ; then
-- AC_MSG_NOTICE([disabling utilities])
-- enable_utils=no
--fi
--
--if test x$enable_modules != xno ; then
-- # -------- set linuxdir ------------
-- AC_MSG_CHECKING([for Linux sources])
-- AC_ARG_WITH([linux],
-- AC_HELP_STRING([--with-linux=path],
-- [set path to Linux source (default=/usr/src/linux)]),
-- [LINUX=$with_linux],
-- [LINUX=/usr/src/linux])
-- AC_MSG_RESULT([$LINUX])
-- AC_SUBST(LINUX)
-- if test x$enable_inkernel = xyes ; then
-- echo ln -s `pwd` $LINUX/fs/lustre
-- rm $LINUX/fs/lustre
-- ln -s `pwd` $LINUX/fs/lustre
-- fi
--
-- # -------- check for .confg --------
-- AC_ARG_WITH([linux-config],
-- [AC_HELP_STRING([--with-linux-config=path],
-- [set path to Linux .conf (default=\$LINUX/.config)])],
-- [LINUX_CONFIG=$with_linux_config],
-- [LINUX_CONFIG=$LINUX/.config])
-- AC_SUBST(LINUX_CONFIG)
--
-- AC_CHECK_FILE([/boot/kernel.h],
-- [KERNEL_SOURCE_HEADER='/boot/kernel.h'],
-- [AC_CHECK_FILE([/var/adm/running-kernel.h]),
-- [KERNEL_SOURCE_HEADER='/var/adm/running-kernel.h']])
--
-- AC_ARG_WITH([kernel-source-header],
-- AC_HELP_STRING([--with-kernel-source-header=path],
-- [Use a different kernel version header. Consult README.kernel-source for details.]),
-- [KERNEL_SOURCE_HEADER=$with_kernel_source_header])
--
-- # --------------------
-- ARCH_UM=
-- UML_CFLAGS=
--
-- AC_MSG_CHECKING([if you are running user mode linux for $host_cpu])
-- if test -e $LINUX/include/asm-um ; then
-- if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then
-- ARCH_UM='ARCH=um'
-- # see notes in Rules.in
-- UML_CFLAGS='-O0'
-- AC_MSG_RESULT(yes)
-- else
-- AC_MSG_RESULT([no (asm doesn't point at asm-um)])
-- fi
-- else
-- AC_MSG_RESULT([no (asm-um missing)])
-- fi
--
-- AC_SUBST(ARCH_UM)
-- AC_SUBST(UML_CFLAGS)
--
-- # --------- Linux 25 ------------------
-- AC_CHECK_FILE([$LINUX/include/linux/namei.h],
-- [
-- linux25="yes"
-- KMODEXT=".ko"
-- enable_ldiskfs="yes"
-- BACKINGFS="ldiskfs"
-- ],[
-- KMODEXT=".o"
-- linux25="no"
-- ])
-- AC_MSG_CHECKING([if you are using Linux 2.6])
-- AC_MSG_RESULT([$linux25])
--
-- AC_SUBST(LINUX25)
-- AC_SUBST(KMODEXT)
--
-- AC_PATH_PROG(PATCH, patch, [no])
-- AC_PATH_PROG(QUILT, quilt, [no])
--
-- if test x$enable_ldiskfs$PATCH$QUILT = xyesnono ; then
-- AC_MSG_ERROR([Quilt or patch are needed to build the ldiskfs module (for Linux 2.6)])
-- fi
--fi
--AM_CONDITIONAL(LINUX25, test x$linux25 = xyes)
--AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno)
--
--# ------- Makeflags ------------------
--
--CPPFLAGS="$CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE -I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include"
--
--# liblustre are all the same
--LLCPPFLAGS="-D__arch_lib__ -D_LARGEFILE64_SOURCE=1"
--AC_SUBST(LLCPPFLAGS)
--
--LLCFLAGS="-g -Wall -fPIC"
--AC_SUBST(LLCFLAGS)
--
--# everyone builds against portals and lustre
--
--if test x$enable_ldiskfs = xyes ; then
-- AC_DEFINE(CONFIG_LDISKFS_FS_MODULE, 1, [build ldiskfs as a module])
-- AC_DEFINE(CONFIG_LDISKFS_FS_XATTR, 1, [enable extended attributes for ldiskfs])
-- AC_DEFINE(CONFIG_LDISKFS_FS_POSIX_ACL, 1, [enable posix acls])
-- AC_DEFINE(CONFIG_LDISKFS_FS_SECURITY, 1, [enable fs security])
--fi
--
--EXTRA_KCFLAGS="-g $CRAY_PORTALS_INCLUDE $CRAY_PORTALS_COMMANDLINE -I$PWD/portals/include -I$PWD/include"
--
--# these are like AC_TRY_COMPILE, but try to build modules against the
--# kernel, inside the kernel-tests directory
--
--AC_DEFUN([LUSTRE_MODULE_CONFTEST],
--[cat >conftest.c <<_ACEOF
--$1
--_ACEOF
--])
--
--AC_DEFUN([LUSTRE_MODULE_COMPILE_IFELSE],
--[m4_ifvaln([$1], [LUSTRE_MODULE_CONFTEST([$1])])dnl
--rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.ko
--AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM SUBDIRS=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])],
-- [$4],
-- [_AC_MSG_LOG_CONFTEST
--m4_ifvaln([$5],[$5])dnl])dnl
--rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.mod.o kernel-tests/conftest.ko m4_ifval([$1], [kernel-tests/conftest.c conftest.c])[]dnl
--])
--
--AC_DEFUN([LUSTRE_MODULE_TRY_COMPILE],
--[LUSTRE_MODULE_COMPILE_IFELSE(
-- [AC_LANG_PROGRAM([[$1]], [[$2]])],
-- [modules],
-- [test -s kernel-tests/conftest.o],
-- [$3], [$4])])
--
--AC_DEFUN([LUSTRE_MODULE_TRY_MAKE],
--[LUSTRE_MODULE_COMPILE_IFELSE([AC_LANG_PROGRAM([[$1]], [[$2]])], [$3], [$4], [$5], [$6])])
--
--# ------------ include paths ------------------
--
--if test x$enable_modules != xno ; then
-- # ------------ .config exists ----------------
-- AC_CHECK_FILE([$LINUX_CONFIG],[],
-- [AC_MSG_ERROR([Kernel config could not be found. If you are building from a kernel-source rpm consult README.kernel-source])])
--
-- # ----------- make dep run? ------------------
-- AC_CHECK_FILES([$LINUX/include/linux/autoconf.h
-- $LINUX/include/linux/version.h
-- $LINUX/include/linux/config.h],[],
-- [AC_MSG_ERROR([Run make config in $LINUX.])])
--
-- # ------------ rhconfig.h includes runtime-generated bits --
-- # red hat kernel-source checks
--
-- # we know this exists after the check above. if the user
-- # tarred up the tree and ran make dep etc. in it, then
-- # version.h gets overwritten with a standard linux one.
--
-- if grep rhconfig $LINUX/include/linux/version.h >/dev/null ; then
-- # This is a clean kernel-source tree, we need to
-- # enable extensive workarounds to get this to build
-- # modules
-- AC_CHECK_FILE([$KERNEL_SOURCE_HEADER],
-- [if test $KERNEL_SOURCE_HEADER = '/boot/kernel.h' ; then
-- AC_MSG_WARN([Using /boot/kernel.h from RUNNING kernel.])
-- AC_MSG_WARN([If this is not what you want, use --with-kernel-source-header.])
-- AC_MSG_WARN([Consult README.kernel-source for details.])
-- fi],
-- [AC_MSG_ERROR([$KERNEL_SOURCE_HEADER not found. Consult README.kernel-source for details.])])
-- EXTRA_KCFLAGS="-include $KERNEL_SOURCE_HEADER $EXTRA_KCFLAGS"
-- fi
--
-- # --- check that we can build modules at all
-- AC_MSG_CHECKING([that modules can be built])
-- LUSTRE_MODULE_TRY_COMPILE([],[],
-- [
-- AC_MSG_RESULT([yes])
-- ],[
-- AC_MSG_RESULT([no])
-- AC_MSG_WARN([Consult config.log for details.])
-- AC_MSG_WARN([If you are trying to build with a kernel-source rpm, consult README.kernel-source])
-- AC_MSG_ERROR([Kernel modules could not be built.])
-- ])
--
-- # ------------ LINUXRELEASE and moduledir ------------------
-- MODULE_TARGET="SUBDIRS"
-- if test $linux25 = 'yes' ; then
-- # ------------ external module support ---------------------
-- makerule="$PWD/kernel-tests"
-- AC_MSG_CHECKING([for external module build support])
-- rm -f kernel-tests/conftest.i
-- LUSTRE_MODULE_TRY_MAKE([],[],
-- [$makerule LUSTRE_KERNEL_TEST=conftest.i],
-- [test -s kernel-tests/conftest.i],
-- [
-- AC_MSG_RESULT([no])
-- ],[
-- AC_MSG_RESULT([yes])
-- makerule="_module_$makerule"
-- MODULE_TARGET="M"
-- ])
-- else
-- makerule="_dir_$PWD/kernel-tests"
-- fi
-- AC_SUBST(MODULE_TARGET)
-- LINUXRELEASE=
-- rm -f kernel-tests/conftest.i
-- AC_MSG_CHECKING([for Linux release])
-- LUSTRE_MODULE_TRY_MAKE(
-- [#include <linux/version.h>],
-- [char *LINUXRELEASE;
-- LINUXRELEASE=UTS_RELEASE;],
-- [$makerule LUSTRE_KERNEL_TEST=conftest.i],
-- [test -s kernel-tests/conftest.i],
-- [
-- # LINUXRELEASE="UTS_RELEASE"
-- eval $(grep "LINUXRELEASE=" kernel-tests/conftest.i)
-- ],[
-- AC_MSG_RESULT([unknown])
-- AC_MSG_ERROR([Could not preprocess test program. Consult config.log for details.])
-- ])
-- rm -f kernel-tests/conftest.i
-- if test x$LINUXRELEASE = x ; then
-- AC_MSG_RESULT([unknown])
-- AC_MSG_ERROR([Could not determine Linux release version from linux/version.h.])
-- fi
-- AC_MSG_RESULT([$LINUXRELEASE])
-- AC_SUBST(LINUXRELEASE)
--
-- moduledir='/lib/modules/'$LINUXRELEASE/kernel
-- modulefsdir='$(moduledir)/fs/$(PACKAGE)'
-- modulenetdir='$(moduledir)/net/$(PACKAGE)'
--
-- AC_SUBST(moduledir)
-- AC_SUBST(modulefsdir)
-- AC_SUBST(modulenetdir)
--
-- # ------------ RELEASE --------------------------------
-- AC_MSG_CHECKING([for Lustre release])
-- RELEASE="`echo ${LINUXRELEASE} | tr '-' '_'`_`date +%Y%m%d%H%M`"
-- AC_MSG_RESULT($RELEASE)
-- AC_SUBST(RELEASE)
--
-- # ---------- Portals flags --------------------
--
-- AC_MSG_CHECKING([for zero-copy TCP support])
-- AC_ARG_ENABLE([zerocopy],
-- AC_HELP_STRING([--disable-zerocopy],
-- [disable socknal zerocopy]),
-- [],[enable_zerocopy='yes'])
-- if test x$enable_zerocopy = xno ; then
-- AC_MSG_RESULT([no (by request)])
-- else
-- ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`"
-- if test "$ZCCD" != 0 ; then
-- AC_DEFINE(SOCKNAL_ZC, 1, [use zero-copy TCP])
-- AC_MSG_RESULT(yes)
-- else
-- AC_MSG_RESULT([no (no kernel support)])
-- fi
-- fi
--
-- AC_ARG_ENABLE([affinity],
-- AC_HELP_STRING([--disable-affinity],
-- [disable process/irq affinity]),
-- [],[enable_affinity='yes'])
--
-- AC_MSG_CHECKING([for CPU affinity support])
-- if test x$enable_affinity = xno ; then
-- AC_MSG_RESULT([no (by request)])
-- else
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/sched.h>
-- ],[
-- struct task_struct t;
-- #ifdef CPU_ARRAY_SIZE
-- cpumask_t m;
-- #else
-- unsigned long m;
-- #endif
-- set_cpus_allowed(&t, m);
-- ],[
-- AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support])
-- AC_MSG_RESULT([yes])
-- ],[
-- AC_MSG_RESULT([no (no kernel support)])
-- ])
-- fi
--
-- #####################################
--
-- AC_MSG_CHECKING([if quadrics kernel headers are present])
-- if test -d $LINUX/drivers/net/qsnet ; then
-- AC_MSG_RESULT([yes])
-- QSWNAL="qswnal"
-- AC_MSG_CHECKING([for multirail EKC])
-- if test -f $LINUX/include/elan/epcomms.h; then
-- AC_MSG_RESULT([supported])
-- QSWCPPFLAGS="-DMULTIRAIL_EKC=1"
-- else
-- AC_MSG_RESULT([not supported])
- QSWCPPFLAGS="-I$LINUX/drivers/net/qsnet/include"
- if test -d $LINUX/drivers/net/qsnet/include; then
- QSWCPPFLAGS="-I$LINUX/drivers/net/qsnet/include"
- else
- QSWCPPFLAGS="-I$LINUX/include/linux"
- fi
-- fi
-- else
-- AC_MSG_RESULT([no])
-- QSWNAL=""
-- QSWCPPFLAGS=""
-- fi
-- AC_SUBST(QSWCPPFLAGS)
-- AC_SUBST(QSWNAL)
--
-- AC_MSG_CHECKING([if gm support was requested])
-- AC_ARG_WITH([gm],
-- AC_HELP_STRING([--with-gm=path],
-- [build gmnal against path]),
-- [
-- case $with_gm in
-- yes)
-- AC_MSG_RESULT([yes])
-- GMCPPFLAGS="-I/usr/local/gm/include"
-- GMNAL="gmnal"
-- ;;
-- no)
-- AC_MSG_RESULT([no])
-- GMCPPFLAGS=""
-- GMNAL=""
-- ;;
-- *)
-- AC_MSG_RESULT([yes])
-- GMCPPFLAGS="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm"
-- GMNAL="gmnal"
-- ;;
-- esac
-- ],[
-- AC_MSG_RESULT([no])
-- GMCPPFLAGS=""
-- GMNAL=""
-- ])
-- AC_SUBST(GMCPPFLAGS)
-- AC_SUBST(GMNAL)
-
- #fixme: where are the default IB includes?
- default_ib_include_dir=/usr/local/ib/include
- an_ib_include_file=vapi.h
--
- AC_MSG_CHECKING([if ib nal support was requested])
- AC_ARG_WITH([ib],
- AC_HELP_STRING([--with-ib=yes/no/path],
- [Path to IB includes]),
- #### OpenIB
- AC_MSG_CHECKING([if OpenIB kernel headers are present])
- OPENIBCPPFLAGS="-I$LINUX/drivers/infiniband/include -DIN_TREE_BUILD"
- EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS $OPENIBCPPFLAGS"
- LUSTRE_MODULE_TRY_COMPILE(
-- [
- case $with_ib in
- yes)
- AC_MSG_RESULT([yes])
- IBCPPFLAGS="-I/usr/local/ib/include"
- IBNAL="ibnal"
- ;;
- no)
- AC_MSG_RESULT([no])
- IBCPPFLAGS=""
- IBNAL=""
- ;;
- *)
- AC_MSG_RESULT([yes])
- IBCPPFLAGS="-I$with_ib"
- IBNAL=""
- ;;
- esac
- #include <ts_ib_core.h>
- ],[
- struct ib_device_properties props;
- return 0;
- ],[
- AC_MSG_RESULT([yes])
- OPENIBNAL="openibnal"
-- ],[
-- AC_MSG_RESULT([no])
- IBFLAGS=""
- IBNAL=""
- OPENIBNAL=""
- OPENIBCPPFLAGS=""
-- ])
- AC_SUBST(IBNAL)
- AC_SUBST(IBCPPFLAGS)
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
- AC_SUBST(OPENIBCPPFLAGS)
- AC_SUBST(OPENIBNAL)
--
-- # ---------- Red Hat 2.4.18 has iobuf->dovary --------------
-- # But other kernels don't
--
-- AC_MSG_CHECKING([if struct kiobuf has a dovary field])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/iobuf.h>
-- ],[
-- struct kiobuf iobuf;
-- iobuf.dovary = 1;
-- ],[
-- AC_MSG_RESULT([yes])
-- AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field])
-- ],[
-- AC_MSG_RESULT([no])
- ])
- ])
--
-- # ----------- 2.6.4 no longer has page->list ---------------
-- AC_MSG_CHECKING([if struct page has a list field])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/mm.h>
-- ],[
-- struct page page;
-- &page.list;
-- ],[
-- AC_MSG_RESULT([yes])
-- AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field])
-- ],[
-- AC_MSG_RESULT([no])
-- ])
--
-- # ---------- Red Hat 2.4.20 backports some 2.5 bits --------
-- # This needs to run after we've defined the KCPPFLAGS
--
-- AC_MSG_CHECKING([if task_struct has a sighand field])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/sched.h>
-- ],[
-- struct task_struct p;
-- p.sighand = NULL;
-- ],[
-- AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches])
-- AC_MSG_RESULT([yes])
-- ],[
-- AC_MSG_RESULT([no])
-- ])
--
-- # ---------- 2.4.20 introduced cond_resched --------------
--
-- AC_MSG_CHECKING([if kernel offers cond_resched])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/sched.h>
-- ],[
-- cond_resched();
-- ],[
-- AC_MSG_RESULT([yes])
-- AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
-- ],[
-- AC_MSG_RESULT([no])
-- ])
-
- # --------- zap_page_range(vma) --------------------------------
- AC_MSG_CHECKING([if zap_pag_range with vma parameter])
- ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
- if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
- AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
- AC_MSG_RESULT([yes])
- else
- AC_MSG_RESULT([no])
- fi
--
-- # ---------- Red Hat 2.4.21 backports some more 2.5 bits --------
--
-- AC_MSG_CHECKING([if kernel defines PDE])
-- HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`"
-- if test "$HAVE_PDE" != 0 ; then
-- AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE])
-- AC_MSG_RESULT([yes])
-- else
-- AC_MSG_RESULT([no])
-- fi
--
-- AC_MSG_CHECKING([if kernel passes struct file to direct_IO])
-- HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`"
-- if test "$HAVE_DIO_FILE" != 0 ; then
-- AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO])
-- AC_MSG_RESULT(yes)
-- else
-- AC_MSG_RESULT(no)
-- fi
--
-- AC_MSG_CHECKING([if kernel defines cpu_online()])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/sched.h>
-- ],[
-- cpu_online(0);
-- ],[
-- AC_MSG_RESULT([yes])
-- AC_DEFINE(HAVE_CPU_ONLINE, 1, [cpu_online found])
-- ],[
-- AC_MSG_RESULT([no])
-- ])
-
-- AC_MSG_CHECKING([if kernel defines cpumask_t])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/sched.h>
-- ],[
-- return sizeof (cpumask_t);
-- ],[
-- AC_MSG_RESULT([yes])
-- AC_DEFINE(HAVE_CPUMASK_T, 1, [cpumask_t found])
-- ],[
-- AC_MSG_RESULT([no])
-- ])
--
-- # ---------- RHEL kernels define page_count in mm_inline.h
-- AC_MSG_CHECKING([if kernel has mm_inline.h header])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/mm_inline.h>
-- ],[
-- #ifndef page_count
-- #error mm_inline.h does not define page_count
-- #endif
-- ],[
-- AC_MSG_RESULT([yes])
-- AC_DEFINE(HAVE_MM_INLINE, 1, [mm_inline found])
-- ],[
-- AC_MSG_RESULT([no])
-- ])
--
-- # ---------- inode->i_alloc_sem --------------
-- AC_MSG_CHECKING([if struct inode has i_alloc_sem])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/fs.h>
-- #include <linux/version.h>
-- ],[
-- #if defined(CONFIG_X86_64) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,24))
-- #error "x86_64 down_read_trylock broken before 2.4.24"
-- #endif
-- struct inode i;
-- return (char *)&i.i_alloc_sem - (char *)&i;
-- ],[
-- AC_MSG_RESULT([yes])
-- AC_DEFINE(HAVE_I_ALLOC_SEM, 1, [struct inode has i_alloc_sem])
-- ],[
-- AC_MSG_RESULT([no])
-- ])
-
--
-- # ---------- modules? ------------------------
-- AC_MSG_CHECKING([for module support])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/config.h>
-- ],[
-- #ifndef CONFIG_MODULES
-- #error CONFIG_MODULES not #defined
-- #endif
-- ],[
-- AC_MSG_RESULT([yes])
-- ],[
-- AC_MSG_RESULT([no])
-- AC_MSG_ERROR([module support is required to build Lustre kernel modules.])
-- ])
--
-- # ---------- modversions? --------------------
-- AC_MSG_CHECKING([for MODVERSIONS])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/config.h>
-- ],[
-- #ifndef CONFIG_MODVERSIONS
-- #error CONFIG_MODVERSIONS not #defined
-- #endif
-- ],[
-- AC_MSG_RESULT([yes])
-- ],[
-- AC_MSG_RESULT([no])
-- ])
--
-- # ------------ preempt -----------------------
-- AC_MSG_CHECKING([if preempt is enabled])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/config.h>
-- ],[
-- #ifndef CONFIG_PREEMPT
-- #error CONFIG_PREEMPT is not #defined
-- #endif
-- ],[
-- AC_MSG_RESULT([yes])
-- AC_MSG_ERROR([Lustre does not support kernels with preempt enabled.])
-- ],[
-- AC_MSG_RESULT([no])
-- ])
--
-- case $BACKINGFS in
-- ext3)
-- # --- Check that ext3 and ext3 xattr are enabled in the kernel
-- AC_MSG_CHECKING([that ext3 is enabled in the kernel])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/config.h>
-- ],[
-- #ifndef CONFIG_EXT3_FS
-- #ifndef CONFIG_EXT3_FS_MODULE
-- #error CONFIG_EXT3_FS not #defined
-- #endif
-- #endif
-- ],[
-- AC_MSG_RESULT([yes])
-- ],[
-- AC_MSG_RESULT([no])
-- AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel (CONFIG_EXT3_FS)])
-- ])
--
-- AC_MSG_CHECKING([that extended attributes for ext3 are enabled in the kernel])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/config.h>
-- ],[
-- #ifndef CONFIG_EXT3_FS_XATTR
-- #error CONFIG_EXT3_FS_XATTR not #defined
-- #endif
-- ],[
-- AC_MSG_RESULT([yes])
-- ],[
-- AC_MSG_RESULT([no])
-- AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel (CONFIG_EXT3_FS_XATTR.)])
-- AC_MSG_WARN([This build may fail.])
-- ])
-- ;;
-- ldiskfs)
-- AC_MSG_CHECKING([if fshooks are present])
-- LUSTRE_MODULE_TRY_COMPILE(
-- [
-- #include <linux/fshooks.h>
-- ],[],[
-- AC_MSG_RESULT([yes])
-- LDISKFS_SERIES="2.6-suse.series"
-- ],[
-- AC_MSG_RESULT([no])
-- LDISKFS_SERIES="2.6-vanilla.series"
-- ])
-- AC_SUBST(LDISKFS_SERIES)
-- # --- check which ldiskfs series we should use
-- ;;
-- esac # $BACKINGFS
--fi
--
- AM_CONDITIONAL(BUILD_IBNAL, test x$IBNAL = "xibnal")
- AM_CONDITIONAL(BUILD_GMNAL, test x$GMNAL = "xgmnal")
--AM_CONDITIONAL(BUILD_QSWNAL, test x$QSWNAL = "xqswnal")
-AM_CONDITIONAL(BUILD_GMNAL, test x$GMNAL = "xgmnal")
-AM_CONDITIONAL(BUILD_OPENIBNAL, test x$OPENIBNAL = "xopenibnal")
--
--CPPFLAGS="-include \$(top_builddir)/include/config.h $CPPFLAGS"
--EXTRA_KCFLAGS="-include $PWD/include/config.h $EXTRA_KCFLAGS"
--AC_SUBST(EXTRA_KCFLAGS)
--
--#echo "KCPPFLAGS: $KCPPFLAGS"
--#echo "KCFLAGS: $KCFLAGS"
--#echo "LLCPPFLAGS: $LLCPPFLAGS"
--#echo "LLCFLAGS: $LLCFLAGS"
--#echo "MOD_LINK: $MOD_LINK"
--#echo "CFLAGS: $CFLAGS"
--#echo "CPPFLAGS: $CPPFLAGS"
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--EXTRA_DIST = archdep.m4 build.m4
--
--SUBDIRS = portals libcfs knals unals router tests doc utils include
+++ /dev/null
--#!/bin/sh
--
--aclocal &&
--automake --add-missing &&
--${AUTOCONF:-autoconf}
+++ /dev/null
--# ---------- other tests and settings ---------
--
--AC_CHECK_TYPE([spinlock_t],
-- [AC_DEFINE(HAVE_SPINLOCK_T, 1, [spinlock_t is defined])],
-- [],
-- [#include <linux/spinlock.h>])
--
--# --------- unsigned long long sane? -------
--
--AC_CHECK_SIZEOF(unsigned long long, 0)
--echo "---> size SIZEOF $SIZEOF_unsigned_long_long"
--echo "---> size SIZEOF $ac_cv_sizeof_unsigned_long_long"
--if test $ac_cv_sizeof_unsigned_long_long != 8 ; then
-- AC_MSG_ERROR([** we assume that sizeof(long long) == 8. Tell phil@clusterfs.com])
--fi
--
--# directories for binaries
--ac_default_prefix=/usr
--
--# mount.lustre
--rootsbindir='/sbin'
--AC_SUBST(rootsbindir)
-sysconfdir='/etc'
-AC_SUBST(sysconfdir)
--# Directories for documentation and demos.
--docdir='${datadir}/doc/$(PACKAGE)'
--AC_SUBST(docdir)
--demodir='$(docdir)/demo'
--AC_SUBST(demodir)
- pkgexampledir='${pkglibdir}/examples'
-pkgexampledir='${pkgdatadir}/examples'
--AC_SUBST(pkgexampledir)
--pymoddir='${pkglibdir}/python/Lustre'
--AC_SUBST(pymoddir)
--
--# ---------- BAD gcc? ------------
--AC_PROG_RANLIB
--AC_PROG_CC
--AC_MSG_CHECKING([for buggy compiler])
--CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"`
--bad_cc() {
-- AC_MSG_RESULT([buggy compiler found!])
-- echo
-- echo " '$CC_VERSION'"
-- echo " has been known to generate bad code, "
-- echo " please get an updated compiler."
-- AC_MSG_ERROR([sorry])
--}
--TMP_VERSION=`echo $CC_VERSION | cut -c 1-16`
--if test "$TMP_VERSION" = "gcc version 2.95"; then
-- bad_cc
--fi
--case "$CC_VERSION" in
-- # ost_pack_niobuf putting 64bit NTOH temporaries on the stack
-- # without "sub $0xc,%esp" to protect the stack from being
-- # stomped on by interrupts (bug 606)
-- "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)")
-- bad_cc
-- ;;
-- # mandrake's similar sub 0xc compiler bug
-- # http://marc.theaimsgroup.com/?l=linux-kernel&m=104748366226348&w=2
-- "gcc version 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)")
-- bad_cc
-- ;;
-- *)
-- AC_MSG_RESULT([no known problems])
-- ;;
--esac
--# end ------ BAD gcc? ------------
--
--# -------- Check for required packages --------------
--
--# this doesn't seem to work on older autoconf
--# AC_CHECK_LIB(readline, readline,,)
--AC_MSG_CHECKING([for readline support])
--AC_ARG_ENABLE(readline,
-- AC_HELP_STRING([--disable-readline],
-- [do not use readline library]),
-- [],[enable_readline='yes'])
--AC_MSG_RESULT([$enable_readline])
--if test x$enable_readline = xyes ; then
-- LIBREADLINE="-lreadline -lncurses"
-- AC_DEFINE(HAVE_LIBREADLINE, 1, [readline library is available])
--else
-- LIBREADLINE=""
--fi
--AC_SUBST(LIBREADLINE)
--
--AC_MSG_CHECKING([if efence debugging support is requested])
--AC_ARG_ENABLE(efence,
-- AC_HELP_STRING([--enable-efence],
-- [use efence library]),
-- [],[enable_efence='no'])
--AC_MSG_RESULT([$enable_efence])
--if test "$enable_efence" = "yes" ; then
-- LIBEFENCE="-lefence"
-- AC_DEFINE(HAVE_LIBEFENCE, 1, [libefence support is requested])
--else
-- LIBEFENCE=""
--fi
--AC_SUBST(LIBEFENCE)
--
--# -------- enable acceptor libwrap (TCP wrappers) support? -------
--AC_MSG_CHECKING([if libwrap support is requested])
--AC_ARG_ENABLE([libwrap],
-- AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]),
-- [case "${enableval}" in
-- yes) enable_libwrap=yes ;;
-- no) enable_libwrap=no ;;
-- *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;;
-- esac],[enable_libwrap=no])
--AC_MSG_RESULT([$enable_libwrap])
--if test x$enable_libwrap = xyes ; then
-- LIBWRAP="-lwrap"
-- AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested])
--else
-- LIBWRAP=""
--fi
--AC_SUBST(LIBWRAP)
+++ /dev/null
--In this document I will try to draw the data structures and how they
--interrelate in the Portals 3 reference implementation. It is probably
--best shown with a drawing, so there may be an additional xfig or
--Postscript figure.
--
--
--MEMORY POOLS:
--------------
--
--First, a digression on memory allocation in the library. As mentioned
--in the NAL Writer's Guide, the library does not link against any
--standard C libraries and as such is unable to dynamically allocate
--memory on its own. It requires that the NAL implement a method
--for allocation that is appropriate for the protection domain in
--which the library lives. This is only called when a network
--interface is initialized to allocate the Portals object pools.
--
--These pools are preallocate blocks of objects that the library
--can rapidly make active and manage with a minimum of overhead.
--It is also cuts down on overhead for setting up structures
--since the NAL->malloc() callback does not need to be called
--for each object.
--
--The objects are maintained on a per-object type singly linked free
--list and contain a pointer to the next free object. This pointer
--is NULL if the object is not on the free list and is non-zero
--if it is on the list. The special sentinal value of 0xDEADBEEF
--is used to mark the end of the free list since NULL could
--indicate that the last object in the list is not free.
--
--When one of the lib_*_alloc() functions is called, the library
--returns the head of the free list and advances the head pointer
--to the next item on the list. The special case of 0xDEADBEEF is
--checked and a NULL pointer is returned if there are no more
--objects of this type available. The lib_*_free() functions
--are even simpler -- check to ensure that the object is not already
--free, set its next pointer to the current head and then set
--the head to be this newly freed object.
--
--Since C does not have templates, I did the next best thing and wrote
--the memory pool allocation code as a macro that expands based on the
--type of the argument. The mk_alloc(T) macro expands to
--write the _lib_T_alloc() and lib_T_free() functions.
--It requires that the object have a pointer of the type T named
--"next_free". There are also functions that map _lib_T_alloc()
--to lib_T_alloc() so that the library can add some extra
--functionality to the T constructor.
--
--
--
--LINKED LISTS:
--------------
--
--Many of the active Portals objects are stored in doubly linked lists
--when they are active. These are always implemented with the pointer
--to the next object and a pointer to the next pointer of the
--previous object. This avoids the "dummy head" object or
--special cases for inserting at the beginning or end of the list.
--The pointer manipulations are a little hairy at times, but
--I hope that they are understandable.
--
--The actual linked list code is implemented as macros in <lib-p30.h>,
--although the object has to know about
--
--
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--LYX2PDF = lyx --export pdf
--LYX2TXT = lyx --export text
--LYX2HTML = lyx --export html
--SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps
--
--if DOC
-- DOCS = portals3.pdf
--else
-- DOCS =
--endif
--
--IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps
--LYXFILES= portals3.lyx
--
--MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(GENERATED)
--GENERATED =
--EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES)
--
--all: $(DOCS)
--
--# update date and version in document
--date := $(shell date +%x)
--tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/')
--addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g'
--
--# Regenerate when the $(VERSION) or $Name: $ changes.
--.INTERMEDIATE: $(GENERATED)
--$(GENERATED) : %.lyx: %.lin Makefile
-- $(addversion) $< > $@
--
--.lyx.pdf:
-- @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n"
--
--.lyx.txt:
-- @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n"
--.lyx.html:
-- @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n"
--.fig.eps:
-- -fig2dev -L eps $< > $@
--
--portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx
--
--syncweb: portals3.pdf
--# cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf
--# ( cd /usr/src/www ; make lustre ; make synclustre )
--
+++ /dev/null
--This documents the life cycle of message as it arrives and is handled by
--a basic async, packetized NAL. There are four types of messages that have
--slightly different life cycles, so they are addressed independently.
--
--
--Put request
-------------
--
--1. NAL notices that there is a incoming message header on the network
--and reads an ptl_hdr_t in from the wire.
--
--2. It may store additional NAL specific data that provides context
--for this event in a void* that it will interpret in some fashion
--later.
--
--3. The NAL calls lib_parse() with a pointer to the header and its
--private data structure.
--
--4. The library decodes the header and may build a message state
--object that describes the event to be written and the ACK to be
--sent, if any. It then calls nal->recv() with the private data
--that the NAL passed in, a pointer to the message state object
--and a translated user address.
--
-- The NAL will have been given a chance to pretranslate
-- all user addresses when the buffers are created. This
-- process is described in the NAL-HOWTO.
--
--5. The NAL should restore what ever context it required from the
--private data pointer, begin receiving the bytes and possibly store
--some extra state of its own. It should return at this point.
--
--
--
--Get request
-------------
--
--1. As with a Put, the NAL notices the incoming message header and
--passes it to lib_parse().
--
--2. The library decodes the header and calls nal->recv() with a
--zero byte length, offset and destination to instruct it to clean
--up the wire after reading the header. The private data will
--be passed in as well, allowing the NAL to retrieve any state
--or context that it requires.
--
--3. The library may build a message state object to possibly
--write an event log or invalidate a memory region.
--
--4. The library will build a ptl_msg_t header that specifies the
--Portals protocol information for delivery at the remote end.
--
--5. The library calls nal->send() with the pre-built header,
--the optional message state object, the four part address
--component, a translated user pointer + offset, and some
--other things.
--
--6. The NAL is to put the header on the wire or copy it at
--this point (since it off the stack). It should store some
--amount of state about its current position in the message and
--the destination address.
--
--7. And then return to the library.
--
--
--Reply request
---------------
--
--1. Starting at "The library decodes the header..."
--
--2. The library decodes the header and calls nal->recv()
--to bring in the rest of the message. Flow continues in
--exactly the same fashion as with all other receives.
--
--
--Ack request
-------------
--
--1. The library decodes the header, builds the appropriate data
--structures for the event in a message state object and calls nal->recv()
--with a zero byte length, etc.
--
--
--Packet arrival
----------------
--
--1. The NAL should notice the arrival of a packet, retrieve whatever
--state it needs from the message ID or other NAL specific header data
--and place the data bytes directly into the user address that were
--given to nal->recv().
--
-- How this happens is outside the scope of the Portals library
-- and soley determined by the NAL...
--
--2. If this is the last packet in a message, the NAL should retrieve
--the lib_msg_t *cookie that it was given in the call to nal->recv()
--and pass it to lib_finalize(). lib_finalize() may call nal->send()
--to send an ACK, nal->write() to record an entry in the event log,
--nal->invalidate() to unregister a region of memory or do nothing at all.
--
--3. It should then clean up any remaining NAL specific state about
--the message and go back into the main loop.
--
--
--Outgoing packets
------------------
--
--1. When the NAL has pending output, it should put the packets on
--the wire wrapped with whatever implementation specified wrappers.
--
--2. Once it has output all the packets of a message it should
--call lib_finalize() with the message state object that was
--handed to nal->send(). This will allows the library to clean
--up its state regarding the message and write any pending event
--entries.
--
--
--
+++ /dev/null
--This document is a first attempt at describing how to write a NAL
--for the Portals 3 library. It also defines the library architecture
--and the abstraction of protection domains.
--
--
--First, an overview of the architecture:
--
-- Application
--
------|----+--------
-- |
-- API === NAL (User space)
-- |
-----------+---|-----
-- |
-- LIB === NAL (Library space)
-- |
-----------+---|-----
--
-- Physical wire (NIC space)
--
--
--Application
-- API
--API-side NAL
--------------
--LIB-side NAL
-- LIB
--LIB-side NAL
-- wire
--
--Communication is through the indicated paths via well defined
--interfaces. The API and LIB portions are written to be portable
--across platforms and do not depend on the network interface.
--
--Communcation between the application and the API code is
--defined in the Portals 3 API specification. This is the
--user-visible portion of the interface and should be the most
--stable.
--
--
--
--API-side NAL:
--------------
--
--The user space NAL needs to implement only a few functions
--that are stored in a nal_t data structure and called by the
--API-side library:
--
-- int forward( nal_t *nal,
-- int index,
-- void *args,
-- size_t arg_len,
-- void *ret,
-- size_t ret_len
-- );
--
--Most of the data structures in the portals library are held in
--the LIB section of the code, so it is necessary to forward API
--calls across the protection domain to the library. This is
--handled by the NAL's forward method. Once the argument and return
--blocks are on the remote side the NAL should call lib_dispatch()
--to invoke the appropriate API function.
--
-- int validate( nal_t *nal,
-- void *base,
-- size_t extent,
-- void **trans_base,
-- void **trans_data
-- );
--
--The validate method provides a means for the NAL to prevalidate
--and possibly pretranslate user addresses into a form suitable
--for fast use by the network card or kernel module. The trans_base
--pointer will be used by the library everytime it needs to
--refer to the block of memory. The trans_data result is a
--cookie that will be handed to the NAL along with the trans_base.
--
--The library never performs calculations on the trans_base value;
--it only computes offsets that are then handed to the NAL.
--
--
-- int shutdown( nal_t *nal, int interface );
--
--Brings down the network interface. The remote NAL side should
--call lib_fini() to bring down the library side of the network.
--
-- void yield( nal_t *nal );
--
--This allows the user application to gracefully give up the processor
--while busy waiting. Performance critical applications may not
--want to take the time to call this function, so it should be an
--option to the PtlEQWait call. Right now it is not implemented as such.
--
--Lastly, the NAL must implement a function named PTL_IFACE_*, where
--* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR.
--This initialization function is to set up communication with the
--library-side NAL, which should call lib_init() to bring up the
--network interface.
--
--
--
--LIB-side NAL:
--------------
--
--On the library-side, the NAL has much more responsibility. It
--is responsible for calling lib_dispatch() on behalf of the user,
--it is also responsible for bringing packets off the wire and
--pushing bits out. As on the user side, the methods are stored
--in a nal_cb_t structure that is defined on a per network
--interface basis.
--
--The calls to lib_dispatch() need to be examined. The prototype:
--
-- void lib_dispatch(
-- nal_cb_t *nal,
-- void *private,
-- int index,
-- void *arg_block,
-- void *ret_block
-- );
--
--has two complications. The private field is a NAL-specific
--value that will be passed to any callbacks produced as a result
--of this API call. Kernel module implementations may use this
--for task structures, or perhaps network card data. It is ignored
--by the library.
--
--Secondly, the arg_block and ret_block must be in the same protection
--domain as the library. The NAL's two halves must communicate the
--sizes and perform the copies. After the call, the buffer pointed
--to by ret_block will be filled in and should be copied back to
--the user space. How this is to be done is NAL specific.
--
-- int lib_parse(
-- nal_cb_t *nal,
-- ptl_hdr_t *hdr,
-- void *private
-- );
--
--This is the only other entry point into the library from the NAL.
--When the NAL detects an incoming message on the wire it should read
--sizeof(ptl_hdr_t) bytes and pass a pointer to the header to
--lib_parse(). It may set private to be anything that it needs to
--tie the incoming message to callbacks that are made as a result
--of this event.
--
--The method calls are:
--
-- int (*send)(
-- nal_cb_t *nal,
-- void *private,
-- lib_msg_t *cookie,
-- ptl_hdr_t *hdr,
-- int nid,
-- int pid,
-- int gid,
-- int rid,
-- user_ptr trans_base,
-- user_ptr trans_data,
-- size_t offset,
-- size_t len
-- );
--
--This is a tricky function -- it must support async output
--of messages as well as properly syncronized event log writing.
--The private field is the same that was passed into lib_dispatch()
--or lib_parse() and may be used to tie this call to the event
--that initiated the entry to the library.
--
--The cookie is a pointer to a library private value that must
--be passed to lib_finalize() once the message has been completely
--sent. It should not be examined by the NAL for any meaning.
--
--The four ID fields are passed in, although some implementations
--may not use all of them.
--
--The single base pointer has been replaced with the translated
--address that the API NAL generated in the api_nal->validate()
--call. The trans_data is unchanged and the offset is in bytes.
--
--
-- int (*recv)(
-- nal_cb_t *nal,
-- void *private,
-- lib_msg_t *cookie,
-- user_ptr trans_base,
-- user_ptr trans_data,
-- size_t offset,
-- size_t mlen,
-- size_t rlen
-- );
--
--This callback will only be called in response to lib_parse().
--The cookie, trans_addr and trans_data are as discussed in send().
--The NAL should read mlen bytes from the wire, deposit them into
--trans_base + offset and then discard (rlen - mlen) bytes.
--Once the entire message has been received the NAL should call
--lib_finalize() with the lib_msg_t *cookie.
--
--The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0
--is used to indicate that the NAL should clean up the wire. This could
--be implemented as a blocking call, although having it return as quickly
--as possible is desirable.
--
-- int (*write)(
-- nal_cb_t *nal,
-- void *private,
-- user_ptr trans_addr,
-- user_ptr trans_data,
-- size_t offset,
--
-- void *src_addr,
-- size_t len
-- );
--
--This is essentially a cross-protection domain memcpy(). The user address
--has been pretranslated by the api_nal->translate() call.
--
-- void *(*malloc)(
-- nal_cb_t *nal,
-- size_t len
-- );
--
-- void (*free)(
-- nal_cb_t *nal,
-- void *buf
-- );
--
--Since the NAL may be in a non-standard hosted environment it can
--not call malloc(). This allows the library side NAL to implement
--the system specific malloc(). In the current reference implementation
--the libary only calls nal->malloc() when the network interface is
--initialized and then calls free when it is brought down. The library
--maintains its own pool of objects for allocation so only one call to
--malloc is made per object type.
--
-- void (*invalidate)(
-- nal_cb_t *nal,
-- user_ptr trans_base,
-- user_ptr trans_data,
-- size_t extent
-- );
--
--User addresses are validated/translated at the user-level API NAL
--method, which is likely to push them to this level. Meanwhile,
--the library NAL will be notified when the library no longer
--needs the buffer. Overlapped buffers are not detected by the
--library, so the NAL should ref count each page involved.
--
--Unfortunately we have a few bugs when the invalidate method is
--called. It is still in progress...
--
-- void (*printf)(
-- nal_cb_t *nal,
-- const char *fmt,
-- ...
-- );
--
--As with malloc(), the library does not have any way to do printf
--or printk. It is not necessary for the NAL to implement the this
--call, although it will make debugging difficult.
--
-- void (*cli)(
-- nal_cb_t *nal,
-- unsigned long *flags
-- );
--
-- void (*sti)(
-- nal_cb_t *nal,
-- unsigned long *flags
-- );
--
--These are used by the library to mark critical sections.
--
-- int (*gidrid2nidpid)(
-- nal_cb_t *nal,
-- ptl_id_t gid,
-- ptl_id_t rid,
-- ptl_id_t *nid,
-- ptl_id_t *pid
-- );
--
--
-- int (*nidpid2gidrid)(
-- nal_cb_t *nal,
-- ptl_id_t nid,
-- ptl_id_t pid,
-- ptl_id_t *gid,
-- ptl_id_t *rid
-- );
--
--Rolf added these. I haven't looked at how they have to work yet.
+++ /dev/null
--#FIG 3.2
--Landscape
--Center
--Inches
--Letter
--100.00
--Single
---2
--1200 2
--6 1200 750 1650 1050
--2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-- 1650 1050 1650 750 1200 750 1200 1050 1650 1050
--4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001
---6
--6 1200 2325 1650 2625
--2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-- 1650 2625 1650 2325 1200 2325 1200 2625 1650 2625
--4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001
---6
--6 1200 1800 1650 2100
--2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-- 1650 2100 1650 1800 1200 1800 1200 2100 1650 2100
--4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001
---6
--6 1200 1275 1650 1575
--2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-- 1650 1575 1650 1275 1200 1275 1200 1575 1650 1575
--4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001
---6
--6 450 750 900 1200
--5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050
--1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 450 825 450 1050
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 900 1050 900 825
---6
--6 450 2325 900 2775
--5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625
--1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 450 2400 450 2625
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 900 2625 900 2400
---6
--6 450 1800 900 2250
--5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100
--1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 450 1875 450 2100
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 900 2100 900 1875
---6
--6 450 1275 900 1725
--5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575
--1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 450 1350 450 1575
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 900 1575 900 1350
---6
--6 2250 750 3450 2625
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-- 2550 1200 3150 1200
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-- 2550 1500 3150 1500
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-- 2550 1800 3150 1800
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-- 2550 2100 3150 2100
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 2550 975 3150 975 3150 2625 2550 2625 2550 975
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-- 2550 2400 3150 2400
--4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001
---6
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-- 0 0 1.00 60.00 120.00
-- 0 0 1.00 60.00 120.00
-- 1650 2400 2550 1350
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-- 0 0 1.00 60.00 120.00
-- 0 0 1.00 60.00 120.00
-- 1650 1875 2550 1050
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-- 0 0 1.00 60.00 120.00
-- 0 0 1.00 60.00 120.00
-- 1650 1425 2550 1950
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-- 0 0 1.00 60.00 120.00
-- 0 0 1.00 60.00 120.00
-- 1650 900 2550 1650
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 900 900 1200 900
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 900 1425 1200 1425
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 900 1950 1200 1950
--2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-- 900 2475 1200 2475
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-- 0 0 1.00 60.00 120.00
-- 0 0 1.00 60.00 120.00
-- 1650 2025 2550 2250
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-- 0 0 1.00 60.00 120.00
-- 0 0 1.00 60.00 120.00
-- 1650 2550 2550 2475
--2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-- 1875 2850 1875 600 225 600 225 2850 1875 2850
--4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001
+++ /dev/null
--#FIG 3.2
--Landscape
--Center
--Inches
--Letter
--100.00
--Single
---2
--1200 2
--6 525 2175 1575 2925
--6 675 2287 1425 2812
--4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001
--4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001
--4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001
---6
--2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-- 1575 2550 1050 2175 525 2550 1050 2925 1575 2550
---6
--6 3450 1275 4350 1725
--6 3600 1312 4200 1687
--4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001
--4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001
---6
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 3450 1275 4350 1275 4350 1725 3450 1725 3450 1275
---6
--6 4650 1275 5550 1725
--6 4725 1312 5475 1687
--4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001
--4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001
---6
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 4650 1275 5550 1275 5550 1725 4650 1725 4650 1275
---6
--6 1350 525 2250 975
--6 1350 562 2250 937
--4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001
--4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001
---6
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 1350 525 2250 525 2250 975 1350 975 1350 525
---6
--6 525 1125 1575 1875
--2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-- 1575 1500 1050 1125 525 1500 1050 1875 1575 1500
--4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001
---6
--6 2340 1237 2940 1687
--6 2340 1237 2940 1687
--4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001
--4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001
--4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001
---6
---6
--6 525 3225 1575 3975
--6 675 3375 1425 3750
--4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001
--4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001
---6
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
-- 525 3600 1050 3225 1575 3600 1050 3975 525 3600
---6
--6 3300 3375 4350 3825
--6 3300 3412 4350 3787
--4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001
--4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001
---6
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 3300 3375 4350 3375 4350 3825 3300 3825 3300 3375
---6
--6 1950 3225 3000 3975
--6 2250 3450 2700 3750
--4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001
--4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001
---6
--2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-- 3000 3600 2475 3225 1950 3600 2475 3975 3000 3600
---6
--6 3150 4500 4200 4950
--6 3150 4537 4200 4912
--4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001
--4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001
---6
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 3150 4500 4200 4500 4200 4950 3150 4950 3150 4500
---6
--6 600 4500 1500 4950
--6 675 4537 1425 4912
--4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001
--4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001
---6
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 600 4500 1500 4500 1500 4950 600 4950 600 4500
---6
--6 4650 4350 5700 5100
--6 4950 4537 5400 4912
--6 4950 4537 5400 4912
--4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001
--4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001
---6
---6
--2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-- 5700 4725 5175 4350 4650 4725 5175 5100 5700 4725
---6
--6 6000 4500 6900 4950
--6 6225 4575 6675 4875
--4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001
--4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001
---6
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 6000 4500 6900 4500 6900 4950 6000 4950 6000 4500
---6
--6 1800 4350 2850 5100
--6 2100 4575 2550 4875
--4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001
--4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001
---6
--2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-- 2850 4725 2325 4350 1800 4725 2325 5100 2850 4725
---6
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1050 1875 1050 2175
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1575 1500 2100 1500
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1050 450 1050 1125
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1350 750 1050 750
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1050 2925 1050 3225
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 3150 1500 3450 1500
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 4350 1500 4650 1500
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
-- 2100 1500 2625 1125 3150 1500 2625 1875 2100 1500
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1575 3600 1950 3600
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1050 3975 1050 4500
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 3000 3600 3300 3600
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1500 4725 1800 4725
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 5700 4725 6000 4725
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 2850 4725 3150 4725
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 4200 4725 4650 4725
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 6900 4725 7950 4725
--3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
-- 0 0 1.00 60.00 120.00
-- 1575 2550 1650 2550 1800 2550 1800 2400 1800 1500
-- 0.000 1.000 1.000 1.000 0.000
--3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
-- 0 0 1.00 60.00 120.00
-- 2250 750 2475 750 2625 750 2625 900 2625 1125
-- 0.000 1.000 1.000 1.000 0.000
--3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
-- 0 0 1.00 60.00 120.00
-- 7500 4725 7500 1650 7500 1500 7350 1500 5550 1500
-- 0.000 1.000 1.000 1.000 0.000
--3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
-- 0 0 1.00 60.00 120.00
-- 2475 3225 2475 2400 2475 2250 2325 2250 1800 2250
-- 0.000 1.000 1.000 1.000 0.000
--3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
-- 0 0 1.00 60.00 120.00
-- 3825 3375 3825 2175 3825 2025 3675 2025 1800 2025
-- 0.000 1.000 1.000 1.000 0.000
--3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
-- 0 0 1.00 60.00 120.00
-- 2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125
-- 4425 4275 4425 4725
-- 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
--3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
-- 0 0 1.00 60.00 120.00
-- 5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125
-- 7275 4275 7275 4725
-- 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
--4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001
--4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001
--4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001
--4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001
--4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001
--4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001
--4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001
--4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001
--4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001
--4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001
--4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001
--4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001
--4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001
--4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001
--4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001
--4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001
+++ /dev/null
--#FIG 3.2
--Landscape
--Center
--Inches
--Letter
--100.00
--Single
---2
--1200 2
--6 2775 900 3525 1200
--4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001
--4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001
---6
--6 1350 1725 2175 2025
--4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001
--4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001
---6
--2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 900 525 2700 750
--2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 2700 825 2700 1275
--2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2
-- 0 0 1.00 60.00 120.00
-- 2700 1350 900 1950
--2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
-- 2400 300 3600 300 3600 2250 2400 2250 2400 300
--2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
-- 0 300 1200 300 1200 2250 0 2250 0 300
--4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001
--4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
--4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
+++ /dev/null
--% ---------------------------------------------------------------
--%
--% by Paolo.Ienne@di.epfl.ch
--%
--% ---------------------------------------------------------------
--%
--% no guarantee is given that the format corresponds perfectly to
--% IEEE 8.5" x 11" Proceedings, but most features should be ok.
--%
--% ---------------------------------------------------------------
--%
--% `ieee' from BibTeX standard bibliography style `abbrv'
--% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09.
--% Copyright (C) 1985, all rights reserved.
--% Copying of this file is authorized only if either
--% (1) you make absolutely no changes to your copy, including name, or
--% (2) if you do make changes, you name it something other than
--% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst.
--% This restriction helps ensure that all standard styles are identical.
--% The file btxbst.doc has the documentation for this style.
--
--ENTRY
-- { address
-- author
-- booktitle
-- chapter
-- edition
-- editor
-- howpublished
-- institution
-- journal
-- key
-- month
-- note
-- number
-- organization
-- pages
-- publisher
-- school
-- series
-- title
-- type
-- volume
-- year
-- }
-- {}
-- { label }
--
--INTEGERS { output.state before.all mid.sentence after.sentence after.block }
--
--FUNCTION {init.state.consts}
--{ #0 'before.all :=
-- #1 'mid.sentence :=
-- #2 'after.sentence :=
-- #3 'after.block :=
--}
--
--STRINGS { s t }
--
--FUNCTION {output.nonnull}
--{ 's :=
-- output.state mid.sentence =
-- { ", " * write$ }
-- { output.state after.block =
-- { add.period$ write$
-- newline$
-- "\newblock " write$
-- }
-- { output.state before.all =
-- 'write$
-- { add.period$ " " * write$ }
-- if$
-- }
-- if$
-- mid.sentence 'output.state :=
-- }
-- if$
-- s
--}
--
--FUNCTION {output}
--{ duplicate$ empty$
-- 'pop$
-- 'output.nonnull
-- if$
--}
--
--FUNCTION {output.check}
--{ 't :=
-- duplicate$ empty$
-- { pop$ "empty " t * " in " * cite$ * warning$ }
-- 'output.nonnull
-- if$
--}
--
--FUNCTION {output.bibitem}
--{ newline$
-- "\bibitem{" write$
-- cite$ write$
-- "}" write$
-- newline$
-- ""
-- before.all 'output.state :=
--}
--
--FUNCTION {fin.entry}
--{ add.period$
-- write$
-- newline$
--}
--
--FUNCTION {new.block}
--{ output.state before.all =
-- 'skip$
-- { after.block 'output.state := }
-- if$
--}
--
--FUNCTION {new.sentence}
--{ output.state after.block =
-- 'skip$
-- { output.state before.all =
-- 'skip$
-- { after.sentence 'output.state := }
-- if$
-- }
-- if$
--}
--
--FUNCTION {not}
--{ { #0 }
-- { #1 }
-- if$
--}
--
--FUNCTION {and}
--{ 'skip$
-- { pop$ #0 }
-- if$
--}
--
--FUNCTION {or}
--{ { pop$ #1 }
-- 'skip$
-- if$
--}
--
--FUNCTION {new.block.checka}
--{ empty$
-- 'skip$
-- 'new.block
-- if$
--}
--
--FUNCTION {new.block.checkb}
--{ empty$
-- swap$ empty$
-- and
-- 'skip$
-- 'new.block
-- if$
--}
--
--FUNCTION {new.sentence.checka}
--{ empty$
-- 'skip$
-- 'new.sentence
-- if$
--}
--
--FUNCTION {new.sentence.checkb}
--{ empty$
-- swap$ empty$
-- and
-- 'skip$
-- 'new.sentence
-- if$
--}
--
--FUNCTION {field.or.null}
--{ duplicate$ empty$
-- { pop$ "" }
-- 'skip$
-- if$
--}
--
--FUNCTION {emphasize}
--{ duplicate$ empty$
-- { pop$ "" }
-- { "{\em " swap$ * "}" * }
-- if$
--}
--
--INTEGERS { nameptr namesleft numnames }
--
--FUNCTION {format.names}
--{ 's :=
-- #1 'nameptr :=
-- s num.names$ 'numnames :=
-- numnames 'namesleft :=
-- { namesleft #0 > }
-- { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't :=
-- nameptr #1 >
-- { namesleft #1 >
-- { ", " * t * }
-- { numnames #2 >
-- { "," * }
-- 'skip$
-- if$
-- t "others" =
-- { " et~al." * }
-- { " and " * t * }
-- if$
-- }
-- if$
-- }
-- 't
-- if$
-- nameptr #1 + 'nameptr :=
-- namesleft #1 - 'namesleft :=
-- }
-- while$
--}
--
--FUNCTION {format.authors}
--{ author empty$
-- { "" }
-- { author format.names }
-- if$
--}
--
--FUNCTION {format.editors}
--{ editor empty$
-- { "" }
-- { editor format.names
-- editor num.names$ #1 >
-- { ", editors" * }
-- { ", editor" * }
-- if$
-- }
-- if$
--}
--
--FUNCTION {format.title}
--{ title empty$
-- { "" }
-- { title "t" change.case$ }
-- if$
--}
--
--FUNCTION {n.dashify}
--{ 't :=
-- ""
-- { t empty$ not }
-- { t #1 #1 substring$ "-" =
-- { t #1 #2 substring$ "--" = not
-- { "--" *
-- t #2 global.max$ substring$ 't :=
-- }
-- { { t #1 #1 substring$ "-" = }
-- { "-" *
-- t #2 global.max$ substring$ 't :=
-- }
-- while$
-- }
-- if$
-- }
-- { t #1 #1 substring$ *
-- t #2 global.max$ substring$ 't :=
-- }
-- if$
-- }
-- while$
--}
--
--FUNCTION {format.date}
--{ year empty$
-- { month empty$
-- { "" }
-- { "there's a month but no year in " cite$ * warning$
-- month
-- }
-- if$
-- }
-- { month empty$
-- 'year
-- { month " " * year * }
-- if$
-- }
-- if$
--}
--
--FUNCTION {format.btitle}
--{ title emphasize
--}
--
--FUNCTION {tie.or.space.connect}
--{ duplicate$ text.length$ #3 <
-- { "~" }
-- { " " }
-- if$
-- swap$ * *
--}
--
--FUNCTION {either.or.check}
--{ empty$
-- 'pop$
-- { "can't use both " swap$ * " fields in " * cite$ * warning$ }
-- if$
--}
--
--FUNCTION {format.bvolume}
--{ volume empty$
-- { "" }
-- { "volume" volume tie.or.space.connect
-- series empty$
-- 'skip$
-- { " of " * series emphasize * }
-- if$
-- "volume and number" number either.or.check
-- }
-- if$
--}
--
--FUNCTION {format.number.series}
--{ volume empty$
-- { number empty$
-- { series field.or.null }
-- { output.state mid.sentence =
-- { "number" }
-- { "Number" }
-- if$
-- number tie.or.space.connect
-- series empty$
-- { "there's a number but no series in " cite$ * warning$ }
-- { " in " * series * }
-- if$
-- }
-- if$
-- }
-- { "" }
-- if$
--}
--
--FUNCTION {format.edition}
--{ edition empty$
-- { "" }
-- { output.state mid.sentence =
-- { edition "l" change.case$ " edition" * }
-- { edition "t" change.case$ " edition" * }
-- if$
-- }
-- if$
--}
--
--INTEGERS { multiresult }
--
--FUNCTION {multi.page.check}
--{ 't :=
-- #0 'multiresult :=
-- { multiresult not
-- t empty$ not
-- and
-- }
-- { t #1 #1 substring$
-- duplicate$ "-" =
-- swap$ duplicate$ "," =
-- swap$ "+" =
-- or or
-- { #1 'multiresult := }
-- { t #2 global.max$ substring$ 't := }
-- if$
-- }
-- while$
-- multiresult
--}
--
--FUNCTION {format.pages}
--{ pages empty$
-- { "" }
-- { pages multi.page.check
-- { "pages" pages n.dashify tie.or.space.connect }
-- { "page" pages tie.or.space.connect }
-- if$
-- }
-- if$
--}
--
--FUNCTION {format.vol.num.pages}
--{ volume field.or.null
-- number empty$
-- 'skip$
-- { "(" number * ")" * *
-- volume empty$
-- { "there's a number but no volume in " cite$ * warning$ }
-- 'skip$
-- if$
-- }
-- if$
-- pages empty$
-- 'skip$
-- { duplicate$ empty$
-- { pop$ format.pages }
-- { ":" * pages n.dashify * }
-- if$
-- }
-- if$
--}
--
--FUNCTION {format.chapter.pages}
--{ chapter empty$
-- 'format.pages
-- { type empty$
-- { "chapter" }
-- { type "l" change.case$ }
-- if$
-- chapter tie.or.space.connect
-- pages empty$
-- 'skip$
-- { ", " * format.pages * }
-- if$
-- }
-- if$
--}
--
--FUNCTION {format.in.ed.booktitle}
--{ booktitle empty$
-- { "" }
-- { editor empty$
-- { "In " booktitle emphasize * }
-- { "In " format.editors * ", " * booktitle emphasize * }
-- if$
-- }
-- if$
--}
--
--FUNCTION {empty.misc.check}
--{ author empty$ title empty$ howpublished empty$
-- month empty$ year empty$ note empty$
-- and and and and and
-- key empty$ not and
-- { "all relevant fields are empty in " cite$ * warning$ }
-- 'skip$
-- if$
--}
--
--FUNCTION {format.thesis.type}
--{ type empty$
-- 'skip$
-- { pop$
-- type "t" change.case$
-- }
-- if$
--}
--
--FUNCTION {format.tr.number}
--{ type empty$
-- { "Technical Report" }
-- 'type
-- if$
-- number empty$
-- { "t" change.case$ }
-- { number tie.or.space.connect }
-- if$
--}
--
--FUNCTION {format.article.crossref}
--{ key empty$
-- { journal empty$
-- { "need key or journal for " cite$ * " to crossref " * crossref *
-- warning$
-- ""
-- }
-- { "In {\em " journal * "\/}" * }
-- if$
-- }
-- { "In " key * }
-- if$
-- " \cite{" * crossref * "}" *
--}
--
--FUNCTION {format.crossref.editor}
--{ editor #1 "{vv~}{ll}" format.name$
-- editor num.names$ duplicate$
-- #2 >
-- { pop$ " et~al." * }
-- { #2 <
-- 'skip$
-- { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
-- { " et~al." * }
-- { " and " * editor #2 "{vv~}{ll}" format.name$ * }
-- if$
-- }
-- if$
-- }
-- if$
--}
--
--FUNCTION {format.book.crossref}
--{ volume empty$
-- { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
-- "In "
-- }
-- { "Volume" volume tie.or.space.connect
-- " of " *
-- }
-- if$
-- editor empty$
-- editor field.or.null author field.or.null =
-- or
-- { key empty$
-- { series empty$
-- { "need editor, key, or series for " cite$ * " to crossref " *
-- crossref * warning$
-- "" *
-- }
-- { "{\em " * series * "\/}" * }
-- if$
-- }
-- { key * }
-- if$
-- }
-- { format.crossref.editor * }
-- if$
-- " \cite{" * crossref * "}" *
--}
--
--FUNCTION {format.incoll.inproc.crossref}
--{ editor empty$
-- editor field.or.null author field.or.null =
-- or
-- { key empty$
-- { booktitle empty$
-- { "need editor, key, or booktitle for " cite$ * " to crossref " *
-- crossref * warning$
-- ""
-- }
-- { "In {\em " booktitle * "\/}" * }
-- if$
-- }
-- { "In " key * }
-- if$
-- }
-- { "In " format.crossref.editor * }
-- if$
-- " \cite{" * crossref * "}" *
--}
--
--FUNCTION {article}
--{ output.bibitem
-- format.authors "author" output.check
-- new.block
-- format.title "title" output.check
-- new.block
-- crossref missing$
-- { journal emphasize "journal" output.check
-- format.vol.num.pages output
-- format.date "year" output.check
-- }
-- { format.article.crossref output.nonnull
-- format.pages output
-- }
-- if$
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {book}
--{ output.bibitem
-- author empty$
-- { format.editors "author and editor" output.check }
-- { format.authors output.nonnull
-- crossref missing$
-- { "author and editor" editor either.or.check }
-- 'skip$
-- if$
-- }
-- if$
-- new.block
-- format.btitle "title" output.check
-- crossref missing$
-- { format.bvolume output
-- new.block
-- format.number.series output
-- new.sentence
-- publisher "publisher" output.check
-- address output
-- }
-- { new.block
-- format.book.crossref output.nonnull
-- }
-- if$
-- format.edition output
-- format.date "year" output.check
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {booklet}
--{ output.bibitem
-- format.authors output
-- new.block
-- format.title "title" output.check
-- howpublished address new.block.checkb
-- howpublished output
-- address output
-- format.date output
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {inbook}
--{ output.bibitem
-- author empty$
-- { format.editors "author and editor" output.check }
-- { format.authors output.nonnull
-- crossref missing$
-- { "author and editor" editor either.or.check }
-- 'skip$
-- if$
-- }
-- if$
-- new.block
-- format.btitle "title" output.check
-- crossref missing$
-- { format.bvolume output
-- format.chapter.pages "chapter and pages" output.check
-- new.block
-- format.number.series output
-- new.sentence
-- publisher "publisher" output.check
-- address output
-- }
-- { format.chapter.pages "chapter and pages" output.check
-- new.block
-- format.book.crossref output.nonnull
-- }
-- if$
-- format.edition output
-- format.date "year" output.check
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {incollection}
--{ output.bibitem
-- format.authors "author" output.check
-- new.block
-- format.title "title" output.check
-- new.block
-- crossref missing$
-- { format.in.ed.booktitle "booktitle" output.check
-- format.bvolume output
-- format.number.series output
-- format.chapter.pages output
-- new.sentence
-- publisher "publisher" output.check
-- address output
-- format.edition output
-- format.date "year" output.check
-- }
-- { format.incoll.inproc.crossref output.nonnull
-- format.chapter.pages output
-- }
-- if$
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {inproceedings}
--{ output.bibitem
-- format.authors "author" output.check
-- new.block
-- format.title "title" output.check
-- new.block
-- crossref missing$
-- { format.in.ed.booktitle "booktitle" output.check
-- format.bvolume output
-- format.number.series output
-- format.pages output
-- address empty$
-- { organization publisher new.sentence.checkb
-- organization output
-- publisher output
-- format.date "year" output.check
-- }
-- { address output.nonnull
-- format.date "year" output.check
-- new.sentence
-- organization output
-- publisher output
-- }
-- if$
-- }
-- { format.incoll.inproc.crossref output.nonnull
-- format.pages output
-- }
-- if$
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {conference} { inproceedings }
--
--FUNCTION {manual}
--{ output.bibitem
-- author empty$
-- { organization empty$
-- 'skip$
-- { organization output.nonnull
-- address output
-- }
-- if$
-- }
-- { format.authors output.nonnull }
-- if$
-- new.block
-- format.btitle "title" output.check
-- author empty$
-- { organization empty$
-- { address new.block.checka
-- address output
-- }
-- 'skip$
-- if$
-- }
-- { organization address new.block.checkb
-- organization output
-- address output
-- }
-- if$
-- format.edition output
-- format.date output
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {mastersthesis}
--{ output.bibitem
-- format.authors "author" output.check
-- new.block
-- format.title "title" output.check
-- new.block
-- "Master's thesis" format.thesis.type output.nonnull
-- school "school" output.check
-- address output
-- format.date "year" output.check
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {misc}
--{ output.bibitem
-- format.authors output
-- title howpublished new.block.checkb
-- format.title output
-- howpublished new.block.checka
-- howpublished output
-- format.date output
-- new.block
-- note output
-- fin.entry
-- empty.misc.check
--}
--
--FUNCTION {phdthesis}
--{ output.bibitem
-- format.authors "author" output.check
-- new.block
-- format.btitle "title" output.check
-- new.block
-- "PhD thesis" format.thesis.type output.nonnull
-- school "school" output.check
-- address output
-- format.date "year" output.check
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {proceedings}
--{ output.bibitem
-- editor empty$
-- { organization output }
-- { format.editors output.nonnull }
-- if$
-- new.block
-- format.btitle "title" output.check
-- format.bvolume output
-- format.number.series output
-- address empty$
-- { editor empty$
-- { publisher new.sentence.checka }
-- { organization publisher new.sentence.checkb
-- organization output
-- }
-- if$
-- publisher output
-- format.date "year" output.check
-- }
-- { address output.nonnull
-- format.date "year" output.check
-- new.sentence
-- editor empty$
-- 'skip$
-- { organization output }
-- if$
-- publisher output
-- }
-- if$
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {techreport}
--{ output.bibitem
-- format.authors "author" output.check
-- new.block
-- format.title "title" output.check
-- new.block
-- format.tr.number output.nonnull
-- institution "institution" output.check
-- address output
-- format.date "year" output.check
-- new.block
-- note output
-- fin.entry
--}
--
--FUNCTION {unpublished}
--{ output.bibitem
-- format.authors "author" output.check
-- new.block
-- format.title "title" output.check
-- new.block
-- note "note" output.check
-- format.date output
-- fin.entry
--}
--
--FUNCTION {default.type} { misc }
--
--MACRO {jan} {"Jan."}
--
--MACRO {feb} {"Feb."}
--
--MACRO {mar} {"Mar."}
--
--MACRO {apr} {"Apr."}
--
--MACRO {may} {"May"}
--
--MACRO {jun} {"June"}
--
--MACRO {jul} {"July"}
--
--MACRO {aug} {"Aug."}
--
--MACRO {sep} {"Sept."}
--
--MACRO {oct} {"Oct."}
--
--MACRO {nov} {"Nov."}
--
--MACRO {dec} {"Dec."}
--
--MACRO {acmcs} {"ACM Comput. Surv."}
--
--MACRO {acta} {"Acta Inf."}
--
--MACRO {cacm} {"Commun. ACM"}
--
--MACRO {ibmjrd} {"IBM J. Res. Dev."}
--
--MACRO {ibmsj} {"IBM Syst.~J."}
--
--MACRO {ieeese} {"IEEE Trans. Softw. Eng."}
--
--MACRO {ieeetc} {"IEEE Trans. Comput."}
--
--MACRO {ieeetcad}
-- {"IEEE Trans. Comput.-Aided Design Integrated Circuits"}
--
--MACRO {ipl} {"Inf. Process. Lett."}
--
--MACRO {jacm} {"J.~ACM"}
--
--MACRO {jcss} {"J.~Comput. Syst. Sci."}
--
--MACRO {scp} {"Sci. Comput. Programming"}
--
--MACRO {sicomp} {"SIAM J. Comput."}
--
--MACRO {tocs} {"ACM Trans. Comput. Syst."}
--
--MACRO {tods} {"ACM Trans. Database Syst."}
--
--MACRO {tog} {"ACM Trans. Gr."}
--
--MACRO {toms} {"ACM Trans. Math. Softw."}
--
--MACRO {toois} {"ACM Trans. Office Inf. Syst."}
--
--MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."}
--
--MACRO {tcs} {"Theoretical Comput. Sci."}
--
--READ
--
--FUNCTION {sortify}
--{ purify$
-- "l" change.case$
--}
--
--INTEGERS { len }
--
--FUNCTION {chop.word}
--{ 's :=
-- 'len :=
-- s #1 len substring$ =
-- { s len #1 + global.max$ substring$ }
-- 's
-- if$
--}
--
--FUNCTION {sort.format.names}
--{ 's :=
-- #1 'nameptr :=
-- ""
-- s num.names$ 'numnames :=
-- numnames 'namesleft :=
-- { namesleft #0 > }
-- { nameptr #1 >
-- { " " * }
-- 'skip$
-- if$
-- s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't :=
-- nameptr numnames = t "others" = and
-- { "et al" * }
-- { t sortify * }
-- if$
-- nameptr #1 + 'nameptr :=
-- namesleft #1 - 'namesleft :=
-- }
-- while$
--}
--
--FUNCTION {sort.format.title}
--{ 't :=
-- "A " #2
-- "An " #3
-- "The " #4 t chop.word
-- chop.word
-- chop.word
-- sortify
-- #1 global.max$ substring$
--}
--
--FUNCTION {author.sort}
--{ author empty$
-- { key empty$
-- { "to sort, need author or key in " cite$ * warning$
-- ""
-- }
-- { key sortify }
-- if$
-- }
-- { author sort.format.names }
-- if$
--}
--
--FUNCTION {author.editor.sort}
--{ author empty$
-- { editor empty$
-- { key empty$
-- { "to sort, need author, editor, or key in " cite$ * warning$
-- ""
-- }
-- { key sortify }
-- if$
-- }
-- { editor sort.format.names }
-- if$
-- }
-- { author sort.format.names }
-- if$
--}
--
--FUNCTION {author.organization.sort}
--{ author empty$
-- { organization empty$
-- { key empty$
-- { "to sort, need author, organization, or key in " cite$ * warning$
-- ""
-- }
-- { key sortify }
-- if$
-- }
-- { "The " #4 organization chop.word sortify }
-- if$
-- }
-- { author sort.format.names }
-- if$
--}
--
--FUNCTION {editor.organization.sort}
--{ editor empty$
-- { organization empty$
-- { key empty$
-- { "to sort, need editor, organization, or key in " cite$ * warning$
-- ""
-- }
-- { key sortify }
-- if$
-- }
-- { "The " #4 organization chop.word sortify }
-- if$
-- }
-- { editor sort.format.names }
-- if$
--}
--
--FUNCTION {presort}
--{ type$ "book" =
-- type$ "inbook" =
-- or
-- 'author.editor.sort
-- { type$ "proceedings" =
-- 'editor.organization.sort
-- { type$ "manual" =
-- 'author.organization.sort
-- 'author.sort
-- if$
-- }
-- if$
-- }
-- if$
-- " "
-- *
-- year field.or.null sortify
-- *
-- " "
-- *
-- title field.or.null
-- sort.format.title
-- *
-- #1 entry.max$ substring$
-- 'sort.key$ :=
--}
--
--ITERATE {presort}
--
--SORT
--
--STRINGS { longest.label }
--
--INTEGERS { number.label longest.label.width }
--
--FUNCTION {initialize.longest.label}
--{ "" 'longest.label :=
-- #1 'number.label :=
-- #0 'longest.label.width :=
--}
--
--FUNCTION {longest.label.pass}
--{ number.label int.to.str$ 'label :=
-- number.label #1 + 'number.label :=
-- label width$ longest.label.width >
-- { label 'longest.label :=
-- label width$ 'longest.label.width :=
-- }
-- 'skip$
-- if$
--}
--
--EXECUTE {initialize.longest.label}
--
--ITERATE {longest.label.pass}
--
--FUNCTION {begin.bib}
--{ preamble$ empty$
-- 'skip$
-- { preamble$ write$ newline$ }
-- if$
-- "\begin{thebibliography}{" longest.label *
-- "}\setlength{\itemsep}{-1ex}\small" * write$ newline$
--}
--
--EXECUTE {begin.bib}
--
--EXECUTE {init.state.consts}
--
--ITERATE {call.type$}
--
--FUNCTION {end.bib}
--{ newline$
-- "\end{thebibliography}" write$ newline$
--}
--
--EXECUTE {end.bib}
--
--% end of file ieee.bst
--% ---------------------------------------------------------------
+++ /dev/null
--#FIG 3.2
--Landscape
--Center
--Inches
--Letter
--100.00
--Single
---2
--1200 2
--6 150 1650 900 2025
--4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001
--4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001
---6
--6 150 150 900 525
--4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001
--4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001
---6
--6 2550 4125 3150 4725
--4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001
--4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001
--4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001
---6
--6 1050 1575 1950 1875
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 1050 1575 1950 1575 1950 1875 1050 1875 1050 1575
--4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001
---6
--6 5400 1575 6300 2175
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 5400 1575 6300 1575 6300 2175 5400 2175 5400 1575
--4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001
---6
--6 5400 2400 6300 3000
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 5400 2400 6300 2400 6300 3000 5400 3000 5400 2400
--4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001
---6
--6 1050 2400 1950 2700
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 1050 2400 1950 2400 1950 2700 1050 2700 1050 2400
--4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001
---6
--6 1050 825 1950 1125
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 1050 825 1950 825 1950 1125 1050 1125 1050 825
--4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001
---6
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1500 1125 1500 1575
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 3225 2025 4050 3375
--2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
-- 150 675 6600 675
--2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
-- 150 1350 6600 1350
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 2400 4125 3300 4125 3300 4725 2400 4725 2400 4125
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 3225 4500 4050 3675
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 3225 1725 5400 1725
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 3225 2550 5400 2550
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 3225 2850 4050 3450
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1500 1800 1500 2400
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 2400 825 3300 825 3300 1275 2400 1275 2400 825
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1500 2625 1500 4125
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 1050 4125 1950 4125 1950 4425 1050 4425 1050 4125
--2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1500 300 1500 825
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1875 975 2400 975
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1875 1725 2400 1725
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1875 2550 2400 2550
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 1875 4275 2400 4275
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 2400 1575 3300 1575 3300 2175 2400 2175 2400 1575
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 2400 2400 3300 2400 3300 3000 2400 3000 2400 2400
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 4050 3300 5250 3300 5250 3750 4050 3750 4050 3300
--4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001
--4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001
--4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001
--4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001
--4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001
--4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001
--4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001
--4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001
--4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001
--4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001
--4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001
--4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001
--4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001
--4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001
--4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001
+++ /dev/null
--#FIG 3.2
--Landscape
--Center
--Inches
--Letter
--100.00
--Single
---2
--1200 2
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 1350 900 1650 900 1650 1200 1350 1200 1350 900
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 1800 1350 2100 1350 2100 1650 1800 1650 1800 1350
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 2250 1800 2550 1800 2550 2100 2250 2100 2250 1800
--2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
-- 4200 375 4200 2100
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 525 600 1125 600 1125 2100 525 2100 525 600
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 4425 1275 4875 1275 4875 1950 4425 1950 4425 1275
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 2550 1200 3150 1200 3150 1500 2550 1500 2550 1200
--2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 3000 1425 4425 1425
--2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-- 3600 825 3750 825 3750 1125 3600 1125 3600 825
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 2025 1425 2550 1425
--2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
-- 4425 750 4875 750 4875 1125 4425 1125 4425 750
--2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 3675 975 4425 975
--3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2
-- 0 0 1.00 60.00 120.00
-- 825 1050 1350 1050
-- 0.000 0.000
--3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
-- 0 0 1.00 60.00 120.00
-- 1500 1125 1500 1350 1500 1500 1650 1500 1800 1500
-- 0.000 1.000 1.000 1.000 0.000
--3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
-- 0 0 1.00 60.00 120.00
-- 1950 1575 1950 1800 1950 1950 2100 1950 2250 1950
-- 0.000 1.000 1.000 1.000 0.000
--3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
-- 525 975 1125 975
-- 0.000 0.000
--3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
-- 525 1125 1125 1125
-- 0.000 0.000
--3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7
-- 0 0 1.00 60.00 120.00
-- 3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975
-- 3600 975
-- 0.000 1.000 1.000 1.000 1.000 1.000 0.000
--4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001
--4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001
--4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001
--4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001
--4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001
--4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001
--4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001
--4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001
--4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001
+++ /dev/null
--@Article{ Cplant,
-- title = { {M}assively {P}arallel {C}omputing with
-- {C}ommodity {C}omponents },
-- author = { Ron Brightwell and David S. Greenberg and Arthur
-- B. Maccabe and Rolf Riesen },
-- journal = { Parallel Computing },
-- volume = { 26 },
-- month = { February },
-- pages = { 243-266 },
-- year = { 2000 }
--}
--
--@Manual{ Portals,
-- organization = { Sandia National Laboratories },
-- title = { {P}uma {P}ortals },
-- note = { http://www.cs.sandia.gov/puma/portals },
-- year = { 1997 }
--}
--
--@Techreport{ VIA,
-- title = { {V}irtual {I}nterface {A}rchitecture
-- {S}pecification {V}ersion 1.0 },
-- author = { {Compaq, Microsoft, and Intel} },
-- institution = { Compaq, Microsoft, and Intel },
-- month = { December },
-- year = { 1997 }
--}
--
--@Techreport{ ST,
-- title = { {I}nformation {T}echnology - {S}cheduled
-- {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 },
-- author = { {Task Group of Technical Committee T11} },
-- institution = { Accredited Standards Committee NCITS },
-- month = { July },
-- year = { 1998 }
--}
--
--@Manual{ TFLOPS,
-- organization = { Sandia National Laboratories },
-- title = { ASCI Red },
-- note = { http://www.sandia.gov/ASCI/TFLOP },
-- year = { 1996 }
--}
--
--@Techreport{ GM,
-- title = { The {GM} {M}essage {P}assing {S}ystem },
-- author = { {Myricom, Inc.} },
-- institution = { {Myricom, Inc.} },
-- year = { 1997 },
--}
--
--@Article{ MPIstandard,
-- title = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard },
-- author = { {Message Passing Interface Forum} },
-- journal = { The International Journal of Supercomputer Applications
-- and High Performance Computing },
-- volume = { 8 },
-- year = { 1994 }
--}
--
--@Inproceedings{ PumaOS,
-- author = "Lance Shuler and Chu Jong and Rolf Riesen and
-- David van Dresser and Arthur B. Maccabe and
-- Lee Ann Fisk and T. Mack Stallcup",
-- booktitle = "Proceeding of the 1995 Intel Supercomputer
-- User's Group Conference",
-- title = "The {P}uma Operating System for Massively Parallel Computers",
-- organization = "Intel Supercomputer User's Group",
-- year = 1995
--}
--
--@InProceedings{ SUNMOS,
--author = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and
-- Stephen R. Wheat",
--title = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide",
--booktitle = "Proceedings of the {Intel} Supercomputer Users' Group. 1994
-- Annual North America Users' Conference.",
--year = 1994,
--pages = "245--251",
--month = "June",
--location = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps"
--}
--
--@InProceedings { PumaMPI,
-- title = { Design and Implementation of {MPI} on {P}uma Portals },
-- author = { Ron Brightwell and Lance Shuler },
-- booktitle = { Proceedings of the Second MPI Developer's Conference },
-- pages = { 18-25 },
-- month = { July },
-- year = { 1996 }
--}
--
--@Inproceedings{ FM2,
-- author = { Mario Lauria and Scott Pakin and Andrew Chien },
-- title = { {E}fficient {L}ayering for {H}igh {S}peed
-- {C}ommunication: {F}ast {M}essages 2.x },
-- Booktitle = { Proceedings of the IEEE International Symposium
-- on High Performance Distributed Computing },
-- year = { 1998 }
--}
--
--@Manual { CraySHMEM,
-- title = "SHMEM Technical Note for C, SG-2516 2.3",
-- organization = "Cray Research, Inc.",
-- month = "October",
-- year = 1994
--}
--
--@Manual { MPI2,
-- title = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface",
-- organization = "Message Passing Interface Forum",
-- note = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html",
-- month = "July",
-- year = 1997
--}
--
--@InProceedings { PMMPI,
-- title = { {The Design and Implementation of Zero Copy MPI Using
-- Commodity Hardware with a High Performance Network} },
-- author = { Francis O'Carroll and Hiroshi Tezuka and Atsushi Hori
-- and Yutaka Ishikawa },
-- booktitle = { Proceedings of the ICS },
-- year = { 1998 }
--}
+++ /dev/null
--#LyX 1.2 created this file. For more info see http://www.lyx.org/
--\lyxformat 220
--\textclass report
--\begin_preamble
--\usepackage{fullpage}
--\renewenvironment{comment}%
--{\begin{quote}\textbf{Discussion}: \slshape}%
--{\end{quote}}
--\pagestyle{myheadings}
--\end_preamble
--\language american
--\inputencoding auto
--\fontscheme pslatex
--\graphics default
--\paperfontsize 10
--\spacing single
--\papersize letterpaper
--\paperpackage a4
--\use_geometry 0
--\use_amsmath 0
--\use_natbib 0
--\use_numerical_citations 0
--\paperorientation portrait
--\secnumdepth 2
--\tocdepth 2
--\paragraph_separation indent
--\defskip medskip
--\quotes_language english
--\quotes_times 2
--\papercolumns 1
--\papersides 2
--\paperpagestyle headings
--
--\layout Title
--
--The Portals 3.2 Message Passing Interface
--\newline
-- Revision 1.1
--\layout Author
--
--Ron Brightwell
--\begin_inset Foot
--collapsed true
--
--\layout Standard
--
--R.
-- Brightwell and R.
-- Riesen are with the Scalable Computing Systems Department, Sandia National
-- Laboratories, P.O.
-- Box 5800, Albuquerque, NM\SpecialChar ~
--\SpecialChar ~
--87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov.
--\end_inset
--
--, Arthur B.
-- Maccabe
--\begin_inset Foot
--collapsed true
--
--\layout Standard
--
--A.
-- B.
-- Maccabe is with the Computer Science Department, University of New Mexico,
-- Albuquerque, NM\SpecialChar ~
--\SpecialChar ~
--87131-1386, maccabe@cs.unm.edu.
--\end_inset
--
--, Rolf Riesen and Trammell Hudson
--\layout Abstract
--
--This report presents a specification for the Portals 3.2 message passing
-- interface.
-- Portals 3.2 is intended to allow scalable, high-performance network communicatio
--n between nodes of a parallel computing system.
-- Specifically, it is designed to support a parallel computing platform composed
-- of clusters of commodity workstations connected by a commodity system area
-- network fabric.
-- In addition, Portals 3.2 is well suited to massively parallel processing
-- and embedded systems.
-- Portals 3.2 represents an adaption of the data movement layer developed
-- for massively parallel processing platforms, such as the 4500-node Intel
-- TeraFLOPS machine.
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--clearpage
--\backslash
--pagenumbering{roman}
--\backslash
--setcounter{page}{3}
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset LatexCommand \tableofcontents{}
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--cleardoublepage
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset FloatList figure
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--cleardoublepage
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset FloatList table
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--cleardoublepage
--\end_inset
--
--
--\layout Chapter*
--
--Summary of Changes for Revision 1.1
--\layout Enumerate
--
--Updated version number to 3.2 throughout the document
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sub:PtlGetId}
--
--\end_inset
--
--: added
--\family typewriter
--PTL_SEGV
--\family default
-- to error list for
--\shape italic
--PtlGetId
--\shape default
--.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
--: added
--\family typewriter
--PTL_ML_TOOLONG
--\family default
-- to error list for
--\shape italic
--PtlMEAttach
--\shape default
--.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:meunlink}
--
--\end_inset
--
--: removed text referring to a list of associated memory descriptors.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:mdfree}
--
--\end_inset
--
--: added text to describe unlinking a free-floating memory descriptor.
--\layout Enumerate
--
--Table
--\begin_inset LatexCommand \ref{tab:types}
--
--\end_inset
--
--: added entry for
--\family typewriter
--ptl_seq_t
--\family default
--.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--:
--\begin_deeper
--\layout Enumerate
--
--added definition of
--\family typewriter
--max_offset
--\family default
--.
--\layout Enumerate
--
--added text to clarify
--\family typewriter
--PTL_MD_MANAGE_REMOTE
--\family default
--.
--\end_deeper
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:mdattach}
--
--\end_inset
--
--: modified text for
--\family typewriter
--unlink_op
--\family default
--.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:niinit}
--
--\end_inset
--
--: added text to clarify multiple calls to
--\shape italic
--PtlNIInit
--\shape default
--.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:mdattach}
--
--\end_inset
--
--: added text to clarify
--\family typewriter
--unlink_nofit
--\family default
--.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:receiving}
--
--\end_inset
--
--: removed text indicating that an MD will reject a message if the associated
-- EQ is full.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:mdfree}
--
--\end_inset
--
--: added
--\family typewriter
--PTL_MD_INUSE
--\family default
-- error code and text to indicate that only MDs with no pending operations
-- can be unlinked.
--\layout Enumerate
--
--Table
--\begin_inset LatexCommand \ref{tab:retcodes}
--
--\end_inset
--
--: added
--\family typewriter
--PTL_MD_INUSE
--\family default
-- return code.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:event-type}
--
--\end_inset
--
--: added user id field, MD handle field, and NI specific failure field to
-- the
--\family typewriter
--ptl_event_t
--\family default
-- structure.
--\layout Enumerate
--
--Table
--\begin_inset LatexCommand \ref{tab:types}
--
--\end_inset
--
--: added
--\family typewriter
--ptl_ni_fail_t
--\family default
--.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:event-type}
--
--\end_inset
--
--: added
--\family typewriter
--PTL_EVENT_UNLINK
--\family default
-- event type.
--\layout Enumerate
--
--Table
--\begin_inset LatexCommand \ref{tab:func}
--
--\end_inset
--
--: removed
--\shape slanted
--PtlTransId
--\shape default
--.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
--, Section
--\begin_inset LatexCommand \ref{sec:meinsert}
--
--\end_inset
--
--, Section
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--: listed allowable constants with relevant fields.
--\layout Enumerate
--
--Table
--\begin_inset LatexCommand \ref{tab:func}
--
--\end_inset
--
--: added
--\shape italic
--PtlMEAttachAny
--\shape default
-- function.
--\layout Enumerate
--
--Table
--\begin_inset LatexCommand \ref{tab:retcodes}
--
--\end_inset
--
--: added
--\family typewriter
--PTL_PT_FULL
--\family default
-- return code for
--\shape italic
--PtlMEAttachAny
--\shape default
--.
--\layout Enumerate
--
--Table
--\begin_inset LatexCommand \ref{tab:oconsts}
--
--\end_inset
--
--: updated to reflect new event types.
--\layout Enumerate
--
--Section
--\begin_inset LatexCommand \ref{sec:id-type}
--
--\end_inset
--
--: added
--\family typewriter
--ptl_nid_t
--\family default
--,
--\family typewriter
--ptl_pid_t
--\family default
--, and
--\family typewriter
--ptl_uid_t
--\family default
--.
--\layout Chapter*
--
--Summary of Changes for Version 3.1
--\layout Section*
--
--Thread Issues
--\layout Standard
--
--The most significant change to the interface from version 3.0 to 3.1 involves
-- the clarification of how the interface interacts with multi-threaded applicatio
--ns.
-- We adopted a generic thread model in which processes define an address
-- space and threads share the address space.
-- Consideration of the API in the light of threads lead to several clarifications
-- throughout the document:
--\layout Enumerate
--
--Glossary:
--\begin_deeper
--\layout Enumerate
--
--added a definition for
--\emph on
--thread
--\emph default
--,
--\layout Enumerate
--
--reworded the definition for
--\emph on
--process
--\emph default
--.
--
--\end_deeper
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:apiover}
--
--\end_inset
--
--: added section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:threads}
--
--\end_inset
--
-- to describe the multi-threading model used by the Portals API.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:ptlinit}
--
--\end_inset
--
--:
--\emph on
--PtlInit
--\emph default
-- must be called at least once and may be called any number of times.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:ptlfini}
--
--\end_inset
--
--:
--\emph on
--PtlFini
--\emph default
-- should be called once as the process is terminating and not as each thread
-- terminates.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:pid}
--
--\end_inset
--
--: Portals does not define thread ids.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:ni}
--
--\end_inset
--
--: network interfaces are associated with processes, not threads.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:niinit}
--
--\end_inset
--
--:
--\emph on
--PtlNIInit
--\emph default
-- must be called at least once and may be called any number of times.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:eqget}
--
--\end_inset
--
--:
--\emph on
--PtlEQGet
--\emph default
-- returns
--\family typewriter
--PTL_EQ_EMPTY
--\family default
-- if a thread is blocked on
--\emph on
--PtlEQWait
--\emph default
--.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:eqwait}
--
--\end_inset
--
--: waiting threads are awakened in FIFO order.
--
--\layout Standard
--
--Two functions,
--\emph on
--PtlNIBarrier
--\emph default
-- and
--\emph on
--PtlEQCount
--\emph default
-- were removed from the API.
--
--\emph on
--PtlNIBarrier
--\emph default
-- was defined to block the calling process until all of the processes in
-- the application group had invoked
--\emph on
--PtlNIBarrier
--\emph default
--.
-- We now consider this functionality, along with the concept of groups (see
-- the discussion under
--\begin_inset Quotes eld
--\end_inset
--
--other changes
--\begin_inset Quotes erd
--\end_inset
--
--), to be part of the runtime system, not part of the Portals API.
--
--\emph on
--PtlEQCount
--\emph default
-- was defined to return the number of events in an event queue.
-- Because external operations may lead to new events being added and other
-- threads may remove events, the value returned by
--\emph on
--PtlEQCount
--\emph default
-- would have to be a hint about the number of events in the event queue.
--\layout Section*
--
--Handling small, unexpected messages
--\layout Standard
--
--Another set of changes relates to handling small unexpected messages in
-- MPI.
-- In designing version 3.0, we assumed that each unexpected message would
-- be placed in a unique memory descriptor.
-- To avoid the need to process a long list of memory descriptors, we moved
-- the memory descriptors out of the match list and hung them off of a single
-- match list entry.
-- In this way, large unexpected messages would only encounter a single
--\begin_inset Quotes eld
--\end_inset
--
--short message
--\begin_inset Quotes erd
--\end_inset
--
-- match list entry before encountering the
--\begin_inset Quotes eld
--\end_inset
--
--long message
--\begin_inset Quotes erd
--\end_inset
--
-- match list entry.
-- Experience with this strategy identified resource management problems with
-- this approach.
-- In particular, a long sequence of very short (or zero length) messages
-- could quickly exhaust the memory descriptors constructed for handling unexpecte
--d messages.
-- Our new strategy involves the use of several very large memory descriptors
-- for small unexpected messages.
-- Consecutive unexpected messages will be written into the first of these
-- memory descriptors until the memory descriptor fills up.
-- When the first of the
--\begin_inset Quotes eld
--\end_inset
--
--small memory
--\begin_inset Quotes erd
--\end_inset
--
-- descriptors fills up, it will be unlinked and subsequent short messages
-- will be written into the next
--\begin_inset Quotes eld
--\end_inset
--
--short message
--\begin_inset Quotes erd
--\end_inset
--
-- memory descriptor.
-- In this case, a
--\begin_inset Quotes eld
--\end_inset
--
--short message
--\begin_inset Quotes erd
--\end_inset
--
-- memory descriptor will be declared full when it does not have sufficient
-- space for the largest small unexpected message.
--\layout Standard
--
--This lead to two significant changes.
-- First, each match list entry now has a single memory descriptor rather
-- than a list of memory descriptors.
-- Second, in addition to exceeding the operation threshold, a memory descriptor
-- can be unlinked when the local offset exceeds a specified value.
-- These changes have lead to several changes in this document:
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{subsec:paddress}
--
--\end_inset
--
--:
--\begin_deeper
--\layout Enumerate
--
--removed references to the memory descriptor list,
--\layout Enumerate
--
--changed the portals address translation description to indicate that unlinking
-- a memory descriptor implies unlinking the associated match list entry--match
-- list entries can no longer be unlinked independently from the memory descriptor.
--
--\end_deeper
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
--:
--\begin_deeper
--\layout Enumerate
--
--removed unlink from argument list,
--\layout Enumerate
--
--removed description of
--\family typewriter
--ptl_unlink
--\family default
-- type,
--\layout Enumerate
--
--changed wording of the error condition when the Portal table index already
-- has an associated match list.
--
--\end_deeper
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:meinsert}
--
--\end_inset
--
--: removed unlink from argument list.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--: added
--\family typewriter
--max_offset
--\family default
--.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:mdattach}
--
--\end_inset
--
--:
--\begin_deeper
--\layout Enumerate
--
--added description of
--\family typewriter
--ptl_unlink
--\family default
-- type,
--\layout Enumerate
--
--removed reference to memory descriptor lists,
--\layout Enumerate
--
--changed wording of the error condition when match list entry already has
-- an associated memory descriptor,
--\layout Enumerate
--
--changed the description of the
--\family typewriter
--unlink
--\family default
-- argument.
--
--\end_deeper
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:md}
--
--\end_inset
--
--: removed
--\family typewriter
--PtlMDInsert
--\family default
-- operation.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:mdbind}
--
--\end_inset
--
--: removed references to memory descriptor list.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:mdfree}
--
--\end_inset
--
--: removed reference to memory descriptor list.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:summary}
--
--\end_inset
--
--: removed references to PtlMDInsert.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:semantics}
--
--\end_inset
--
--: removed reference to memory descriptor list.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:exmpi}
--
--\end_inset
--
--: revised the MPI example to reflect the changes to the interface.
--
--\layout Standard
--
--Several changes have been made to improve the general documentation of the
-- interface.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:handle-type}
--
--\end_inset
--
--: documented the special value
--\family typewriter
--PTL_EQ_NONE
--\family default
--.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:id-type}
--
--\end_inset
--
--: documented the special value
--\family typewriter
--PTL_ID_ANY
--\family default
--.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:mdbind}
--
--\end_inset
--
--: documented the return value
--\family typewriter
--PTL_INV_EQ
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:mdupdate}
--
--\end_inset
--
--: clarified the description of the
--\emph on
--PtlMDUpdate
--\emph default
-- function.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:implvals}
--
--\end_inset
--
--: introduced a new section to document the implementation defined values.
--
--\layout Enumerate
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:summary}
--
--\end_inset
--
--: modified Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:oconsts}
--
--\end_inset
--
-- to indicate where each constant is introduced and where it is used.
--
--\layout Section*
--
--Other changes
--\layout Subsection*
--
--Implementation defined limits (Section
--\begin_inset LatexCommand \ref{sec:niinit}
--
--\end_inset
--
--)
--\layout Standard
--
--The earlier version provided implementation defined limits for the maximum
-- number of match entries, the maximum number of memory descriptors, etc.
-- Rather than spanning the entire implementation, these limits are now associated
-- with individual network interfaces.
--\layout Subsection*
--
--Added User Ids (Section
--\begin_inset LatexCommand \ref{sec:uid}
--
--\end_inset
--
--)
--\layout Standard
--
--Group Ids had been used to simplify access control entries.
-- In particular, a process could allow access for all of the processes in
-- a group.
-- User Ids have been introduced to regain this functionality.
-- We use user ids to fill this role.
--\layout Subsection*
--
--Removed Group Ids and Rank Ids (Section
--\begin_inset LatexCommand \ref{sec:pid}
--
--\end_inset
--
--)
--\layout Standard
--
--The earlier version of Portals had two forms for addressing processes: <node
-- id, process id> and <group id, rank id>.
-- A process group was defined as the collection processes created during
-- application launch.
-- Each process in the group was given a unique rank id in the range 0 to
--
--\begin_inset Formula $n-1$
--\end_inset
--
-- where
--\begin_inset Formula $n$
--\end_inset
--
-- was the number of processes in the group.
-- We removed groups because they are better handled in the runtime system.
--\layout Subsection*
--
--Match lists (Section
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
--)
--\layout Standard
--
--It is no longer illegal to have an existing match entry when calling PtlMEAttach.
-- A position argument was added to the list of arguments supplied to
--\emph on
--PtlMEAttach
--\emph default
-- to specify whether the new match entry is prepended or appended to the
-- existing list.
-- If there is no existing match list, the position argument is ignored.
--\layout Subsection*
--
--Unlinking Memory Descriptors (Section
--\begin_inset LatexCommand \ref{sec:md}
--
--\end_inset
--
--)
--\layout Standard
--
--Previously, a memory descriptor could be unlinked if the offset exceeded
-- a threshold upon the completion of an operation.
-- In this version, the unlinking is delayed until there is a matching operation
-- which requires more memory than is currently available in the descriptor.
-- In addition to changes in section, this lead to a revision of Figure\SpecialChar ~
--
--\begin_inset LatexCommand \ref{fig:flow}
--
--\end_inset
--
--.
--\layout Subsection*
--
--Split Phase Operations and Events (Section
--\begin_inset LatexCommand \ref{sec:eq}
--
--\end_inset
--
--)
--\layout Standard
--
--Previously, there were five types of events:
--\family typewriter
--PTL_EVENT_PUT
--\family default
--,
--\family typewriter
--PTL_EVENT_GET
--\family default
--,
--\family typewriter
--PTL_EVENT_REPLY
--\family default
--,
--\family typewriter
--PTL_EVENT_SENT
--\family default
--, and
--\family typewriter
--PTL_EVENT_ACK.
--
--\family default
--The first four of these reflected the completion of potentially long operations.
-- We have introduced new event types to reflect the fact that long operations
-- have a distinct starting point and a distinct completion point.
-- Moreover, the completion may be successful or unsuccessful.
--\layout Standard
--
--In addition to providing a mechanism for reporting failure to higher levels
-- of software, this split provides an opportunity for for improved ordering
-- semantics.
-- Previously, if one process intiated two operations (e.g., two put operations)
-- on a remote process, these operations were guaranteed to complete in the
-- same order that they were initiated.
-- Now, we only guarantee that the initiation events are delivered in the
-- same order.
-- In particular, the operations do not need to complete in the order that
-- they were intiated.
--\layout Subsection*
--
--Well known proces ids (Section
--\begin_inset LatexCommand \ref{sec:niinit}
--
--\end_inset
--
--)
--\layout Standard
--
--To support the notion of
--\begin_inset Quotes eld
--\end_inset
--
--well known process ids,
--\begin_inset Quotes erd
--\end_inset
--
-- we added a process id argument to the arguments for PtlNIInit.
--\layout Chapter*
--
--Glossary
--\layout Description
--
--API Application Programming Interface.
-- A definition of the functions and semantics provided by library of functions.
--
--\layout Description
--
--Initiator A
--\emph on
--process
--\emph default
-- that initiates a message operation.
--
--\layout Description
--
--Message An application-defined unit of data that is exchanged between
--\emph on
--processes
--\emph default
--.
--
--\layout Description
--
--Message\SpecialChar ~
--Operation Either a put operation, which writes data, or a get operation,
-- which reads data.
--
--\layout Description
--
--Network A network provides point-to-point communication between
--\emph on
--nodes
--\emph default
--.
-- Internally, a network may provide multiple routes between endpoints (to
-- improve fault tolerance or to improve performance characteristics); however,
-- multiple paths will not be exposed outside of the network.
--
--\layout Description
--
--Node A node is an endpoint in a
--\emph on
--network
--\emph default
--.
-- Nodes provide processing capabilities and memory.
-- A node may provide multiple processors (an SMP node) or it may act as a
--
--\emph on
--gateway
--\emph default
-- between networks.
--
--\layout Description
--
--Process A context of execution.
-- A process defines a virtual memory (VM) context.
-- This context is not shared with other processes.
-- Several threads may share the VM context defined by a process.
--
--\layout Description
--
--Target A
--\emph on
--process
--\emph default
-- that is acted upon by a message operation.
--
--\layout Description
--
--Thread A context of execution that shares a VM context with other threads.
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--cleardoublepage
--\layout Standard
--
--\backslash
--setcounter{page}{1}
--\backslash
--pagenumbering{arabic}
--\end_inset
--
--
--\layout Chapter
--
--Introduction
--\begin_inset LatexCommand \label{sec:intro}
--
--\end_inset
--
--
--\layout Section
--
--Overview
--\layout Standard
--
--This document describes an application programming interface for message
-- passing between nodes in a system area network.
-- The goal of this interface is to improve the scalability and performance
-- of network communication by defining the functions and semantics of message
-- passing required for scaling a parallel computing system to ten thousand
-- nodes.
-- This goal is achieved by providing an interface that will allow a quality
-- implementation to take advantage of the inherently scalable design of Portals.
--\layout Standard
--
--This document is divided into several sections:
--\layout Description
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:intro}
--
--\end_inset
--
-----Introduction This section describes the purpose and scope of the Portals
-- API.
--
--\layout Description
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:apiover}
--
--\end_inset
--
-----An\SpecialChar ~
--Overview\SpecialChar ~
--of\SpecialChar ~
--the\SpecialChar ~
--Portals\SpecialChar ~
--3.1\SpecialChar ~
--API This section gives a brief overview of the
-- Portals API.
-- The goal is to introduce the key concepts and terminology used in the descripti
--on of the API.
--
--\layout Description
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:api}
--
--\end_inset
--
-----The\SpecialChar ~
--Portals\SpecialChar ~
--3.2\SpecialChar ~
--API This section describes the functions and semantics of
-- the Portals application programming interface.
--
--\layout Description
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:semantics}
--
--\end_inset
--
----The\SpecialChar ~
--Semantics\SpecialChar ~
--of\SpecialChar ~
--Message\SpecialChar ~
--Transmission This section describes the semantics
-- of message transmission.
-- In particular, the information transmitted in each type of message and
-- the processing of incoming messages.
--
--\layout Description
--
--Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:examples}
--
--\end_inset
--
-----Examples This section presents several examples intended to illustrates
-- the use of the Portals API.
--
--\layout Section
--
--Purpose
--\layout Standard
--
--Existing message passing technologies available for commodity cluster networking
-- hardware do not meet the scalability goals required by the Cplant\SpecialChar ~
--
--\begin_inset LatexCommand \cite{Cplant}
--
--\end_inset
--
-- project at Sandia National Laboratories.
-- The goal of the Cplant project is to construct a commodity cluster that
-- can scale to the order of ten thousand nodes.
-- This number greatly exceeds the capacity for which existing message passing
-- technologies have been designed and implemented.
--\layout Standard
--
--In addition to the scalability requirements of the network, these technologies
-- must also be able to support a scalable implementation of the Message Passing
-- Interface (MPI)\SpecialChar ~
--
--\begin_inset LatexCommand \cite{MPIstandard}
--
--\end_inset
--
-- standard, which has become the
--\shape italic
--de facto
--\shape default
-- standard for parallel scientific computing.
-- While MPI does not impose any scalability limitations, existing message
-- passing technologies do not provide the functionality needed to allow implement
--ations of MPI to meet the scalability requirements of Cplant.
--\layout Standard
--
--The following are properties of a network architecture that do not impose
-- any inherent scalability limitations:
--\layout Itemize
--
--Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~
--
--\begin_inset LatexCommand \cite{VIA}
--
--\end_inset
--
-- and TCP/IP sockets, have limitations on the number of peer connections
-- that can be established.
--
--\layout Itemize
--
--Network independence - Many communication systems depend on the host processor
-- to perform operations in order for messages in the network to be consumed.
-- Message consumption from the network should not be dependent on host processor
-- activity, such as the operating system scheduler or user-level thread scheduler.
--
--\layout Itemize
--
--User-level flow control - Many communication systems manage flow control
-- internally to avoid depleting resources, which can significantly impact
-- performance as the number of communicating processes increases.
--
--\layout Itemize
--
--OS Bypass - High performance network communication should not involve memory
-- copies into or out of a kernel-managed protocol stack.
--
--\layout Standard
--
--The following are properties of a network architecture that do not impose
-- scalability limitations for an implementation of MPI:
--\layout Itemize
--
--Receiver-managed - Sender-managed message passing implementations require
-- a persistent block of memory to be available for every process, requiring
-- memory resources to increase with job size and requiring user-level flow
-- control mechanisms to manage these resources.
--
--\layout Itemize
--
--User-level Bypass - While OS Bypass is necessary for high-performance, it
-- alone is not sufficient to support the Progress Rule of MPI asynchronous
-- operations.
--
--\layout Itemize
--
--Unexpected messages - Few communication systems have support for receiving
-- messages for which there is no prior notification.
-- Support for these types of messages is necessary to avoid flow control
-- and protocol overhead.
--
--\layout Section
--
--Background
--\layout Standard
--
--Portals was originally designed for and implemented on the nCube machine
-- as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~
--
--\begin_inset LatexCommand \cite{SUNMOS}
--
--\end_inset
--
-- and Puma\SpecialChar ~
--
--\begin_inset LatexCommand \cite{PumaOS}
--
--\end_inset
--
-- lightweight kernel development projects.
-- Portals went through two design phases, the latter of which is used on
-- the 4500-node Intel TeraFLOPS machine\SpecialChar ~
--
--\begin_inset LatexCommand \cite{TFLOPS}
--
--\end_inset
--
--.
-- Portals have been very successful in meeting the needs of such a large
-- machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~
--
--\begin_inset LatexCommand \cite{PumaMPI}
--
--\end_inset
--
--, but also for implementing the scalable run-time environment and parallel
-- I/O capabilities of the machine.
--\layout Standard
--
--The second generation Portals implementation was designed to take full advantage
-- of the hardware architecture of large MPP machines.
-- However, efforts to implement this same design on commodity cluster technology
-- identified several limitations, due to the differences in network hardware
-- as well as to shortcomings in the design of Portals.
--\layout Section
--
--Scalability
--\layout Standard
--
--The primary goal in the design of Portals is scalability.
-- Portals are designed specifically for an implementation capable of supporting
-- a parallel job running on tens of thousands of nodes.
-- Performance is critical only in terms of scalability.
-- That is, the level of message passing performance is characterized by how
-- far it allows an application to scale and not by how it performs in micro-bench
--marks (e.g., a two node bandwidth or latency test).
--\layout Standard
--
--The Portals API is designed to allow for scalability, not to guarantee it.
-- Portals cannot overcome the shortcomings of a poorly designed application
-- program.
-- Applications that have inherent scalability limitations, either through
-- design or implementation, will not be transformed by Portals into scalable
-- applications.
-- Scalability must be addressed at all levels.
-- Portals do not inhibit scalability, but do not guarantee it either.
--\layout Standard
--
--To support scalability, the Portals interface maintains a minimal amount
-- of state.
-- Portals provide reliable, ordered delivery of messages between pairs of
-- processes.
-- They are connectionless: a process is not required to explicitly establish
-- a point-to-point connection with another process in order to communicate.
-- Moreover, all buffers used in the transmission of messages are maintained
-- in user space.
-- The target process determines how to respond to incoming messages, and
-- messages for which there are no buffers are discarded.
--\layout Section
--
--Communication Model
--\layout Standard
--
--Portals combine the characteristics of both one-side and two-sided communication.
-- They define a
--\begin_inset Quotes eld
--\end_inset
--
--matching put
--\begin_inset Quotes erd
--\end_inset
--
-- operation and a
--\begin_inset Quotes eld
--\end_inset
--
--matching get
--\begin_inset Quotes erd
--\end_inset
--
-- operation.
-- The destination of a put (or send) is not an explicit address; instead,
-- each message contains a set of match bits that allow the receiver to determine
-- where incoming messages should be placed.
-- This flexibility allows Portals to support both traditional one-sided operation
--s and two-sided send/receive operations.
--\layout Standard
--
--Portals allows the target to determine whether incoming messages are acceptable.
-- A target process can choose to accept message operations from any specific
-- process or can choose to ignore message operations from any specific process.
--\layout Section
--
--Zero Copy, OS Bypass and Application Bypass
--\layout Standard
--
--In traditional system architectures, network packets arrive at the network
-- interface card (NIC), are passed through one or more protocol layers in
-- the operating system, and eventually copied into the address space of the
-- application.
-- As network bandwidth began to approach memory copy rates, reduction of
-- memory copies became a critical concern.
-- This concern lead to the development of zero-copy message passing protocols
-- in which message copies are eliminated or pipelined to avoid the loss of
-- bandwidth.
--\layout Standard
--
--A typical zero-copy protocol has the NIC generate an interrupt for the CPU
-- when a message arrives from the network.
-- The interrupt handler then controls the transfer of the incoming message
-- into the address space of the appropriate application.
-- The interrupt latency, the time from the initiation of an interrupt until
-- the interrupt handler is running, is fairly significant.
-- To avoid this cost, some modern NICs have processors that can be programmed
-- to implement part of a message passing protocol.
-- Given a properly designed protocol, it is possible to program the NIC to
-- control the transfer of incoming messages, without needing to interrupt
-- the CPU.
-- Because this strategy does not need to involve the OS on every message
-- transfer, it is frequently called
--\begin_inset Quotes eld
--\end_inset
--
--OS Bypass.
--\begin_inset Quotes erd
--\end_inset
--
-- ST\SpecialChar ~
--
--\begin_inset LatexCommand \cite{ST}
--
--\end_inset
--
--, VIA\SpecialChar ~
--
--\begin_inset LatexCommand \cite{VIA}
--
--\end_inset
--
--, FM\SpecialChar ~
--
--\begin_inset LatexCommand \cite{FM2}
--
--\end_inset
--
--, GM\SpecialChar ~
--
--\begin_inset LatexCommand \cite{GM}
--
--\end_inset
--
--, and Portals are examples of OS Bypass protocols.
--\layout Standard
--
--Many protocols that support OS Bypass still require that the application
-- actively participate in the protocol to ensure progress.
-- As an example, the long message protocol of PM requires that the application
-- receive and reply to a request to put or get a long message.
-- This complicates the runtime environment, requiring a thread to process
-- incoming requests, and significantly increases the latency required to
-- initiate a long message protocol.
-- The Portals message passing protocol does not require activity on the part
-- of the application to ensure progress.
-- We use the term
--\begin_inset Quotes eld
--\end_inset
--
--Application Bypass
--\begin_inset Quotes erd
--\end_inset
--
-- to refer to this aspect of the Portals protocol.
--\layout Section
--
--Faults
--\layout Standard
--
--Given the number of components that we are dealing with and the fact that
-- we are interested in supporting applications that run for very long times,
-- failures are inevitable.
-- The Portals API recognizes that the underlying transport may not be able
-- to successfully complete an operation once it has been initiated.
-- This is reflected in the fact that the Portals API reports three types
-- of events: events indicating the initiation of an operation, events indicating
-- the successful completion of an operation, and events indicating the unsuccessf
--ul completion of an operation.
-- Every initiation event is eventually followed by a successful completion
-- event or an unsuccessful completion event.
--\layout Standard
--
--Between the time an operation is started and the time that the operation
-- completes (successfully or unsuccessfully), any memory associated with
-- the operation should be considered volatile.
-- That is, the memory may be changed in unpredictable ways while the operation
-- is progressing.
-- Once the operation completes, the memory associated with the operation
-- will not be subject to further modification (from this operation).
-- Notice that unsuccessful operations may alter memory in an essentially
-- unpredictable fashion.
--\layout Chapter
--
--An Overview of the Portals API
--\begin_inset LatexCommand \label{sec:apiover}
--
--\end_inset
--
--
--\layout Standard
--
--In this section, we give a conceptual overview of the Portals API.
-- The goal is to provide a context for understanding the detailed description
-- of the API presented in the next section.
--\layout Section
--
--Data Movement
--\begin_inset LatexCommand \label{sec:dmsemantics}
--
--\end_inset
--
--
--\layout Standard
--
--A Portal represents an opening in the address space of a process.
-- Other processes can use a Portal to read (get) or write (put) the memory
-- associated with the portal.
-- Every data movement operation involves two processes, the
--\series bold
--initiator
--\series default
-- and the
--\series bold
--target
--\series default
--.
-- The initiator is the process that initiates the data movement operation.
-- The target is the process that responds to the operation by either accepting
-- the data for a put operation, or replying with the data for a get operation.
--\layout Standard
--
--In this discussion, activities attributed to a process may refer to activities
-- that are actually performed by the process or
--\emph on
--on behalf of the process
--\emph default
--.
-- The inclusiveness of our terminology is important in the context of
--\emph on
--application bypass
--\emph default
--.
-- In particular, when we note that the target sends a reply in the case of
-- a get operation, it is possible that reply will be generated by another
-- component in the system, bypassing the application.
--\layout Standard
--
--Figures\SpecialChar ~
--
--\begin_inset LatexCommand \ref{fig:put}
--
--\end_inset
--
-- and
--\begin_inset LatexCommand \ref{fig:get}
--
--\end_inset
--
-- present graphical interpretations of the Portal data movement operations:
-- put and get.
-- In the case of a put operation, the initiator sends a put request message
-- containing the data to the target.
-- The target translates the Portal addressing information in the request
-- using its local Portal structures.
-- When the request has been processed, the target optionally sends an acknowledge
--ment message.
--\layout Standard
--
--
--\begin_inset Float figure
--placement htbp
--wide false
--collapsed false
--
--\layout Standard
--\align center
--
--\begin_inset Graphics FormatVersion 1
-- filename put.eps
-- display color
-- size_type 0
-- rotateOrigin center
-- lyxsize_type 1
-- lyxwidth 218pt
-- lyxheight 119pt
--\end_inset
--
--
--\layout Caption
--
--Portal Put (Send)
--\begin_inset LatexCommand \label{fig:put}
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--In the case of a get operation, the initiator sends a get request to the
-- target.
-- As with the put operation, the target translates the Portal addressing
-- information in the request using its local Portal structures.
-- Once it has translated the Portal addressing information, the target sends
-- a reply that includes the requested data.
--\layout Standard
--
--
--\begin_inset Float figure
--placement htbp
--wide false
--collapsed false
--
--\layout Standard
--\align center
--
--\begin_inset Graphics FormatVersion 1
-- filename get.eps
-- display color
-- size_type 0
-- rotateOrigin center
-- lyxsize_type 1
-- lyxwidth 218pt
-- lyxheight 119pt
--\end_inset
--
--
--\layout Caption
--
--Portal Get
--\begin_inset LatexCommand \label{fig:get}
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--We should note that Portal address translations are only performed on nodes
-- that respond to operations initiated by other nodes.
-- Acknowledgements and replies to get operations bypass the portals address
-- translation structures.
--\layout Section
--
--Portal Addressing
--\begin_inset LatexCommand \label{subsec:paddress}
--
--\end_inset
--
--
--\layout Standard
--
--One-sided data movement models (e.g., shmem\SpecialChar ~
--
--\begin_inset LatexCommand \cite{CraySHMEM}
--
--\end_inset
--
--, ST\SpecialChar ~
--
--\begin_inset LatexCommand \cite{ST}
--
--\end_inset
--
--, MPI-2\SpecialChar ~
--
--\begin_inset LatexCommand \cite{MPI2}
--
--\end_inset
--
--) typically use a triple to address memory on a remote node.
-- This triple consists of a process id, memory buffer id, and offset.
-- The process id identifies the target process, the memory buffer id specifies
-- the region of memory to be used for the operation, and the offset specifies
-- an offset within the memory buffer.
--\layout Standard
--
--In addition to the standard address components (process id, memory buffer
-- id, and offset), a Portal address includes a set of match bits.
-- This addressing model is appropriate for supporting one-sided operations
-- as well as traditional two-sided message passing operations.
-- Specifically, the Portals API provides the flexibility needed for an efficient
-- implementation of MPI-1, which defines two-sided operations with one-sided
-- completion semantics.
--\layout Standard
--
--Figure\SpecialChar ~
--
--\begin_inset LatexCommand \ref{fig:portals}
--
--\end_inset
--
-- presents a graphical representation of the structures used by a target
-- in the interpretation of a Portal address.
-- The process id is used to route the message to the appropriate node and
-- is not reflected in this diagram.
-- The memory buffer id, called the
--\series bold
--portal id
--\series default
--, is used as an index into the Portal table.
-- Each element of the Portal table identifies a match list.
-- Each element of the match list specifies two bit patterns: a set of
--\begin_inset Quotes eld
--\end_inset
--
--don't care
--\begin_inset Quotes erd
--\end_inset
--
-- bits, and a set of
--\begin_inset Quotes eld
--\end_inset
--
--must match
--\begin_inset Quotes erd
--\end_inset
--
-- bits.
-- In addition to the two sets of match bits, each match list element has
-- at most one memory descriptor.
-- Each memory descriptor identifies a memory region and an optional event
-- queue.
-- The memory region specifies the memory to be used in the operation and
-- the event queue is used to record information about these operations.
--\layout Standard
--
--
--\begin_inset Float figure
--placement htbp
--wide false
--collapsed false
--
--\layout Standard
--\align center
--
--\begin_inset Graphics FormatVersion 1
-- filename portals.eps
-- display color
-- size_type 0
-- rotateOrigin center
-- lyxsize_type 1
-- lyxwidth 305pt
-- lyxheight 106pt
--\end_inset
--
--
--\layout Caption
--
--Portal Addressing Structures
--\begin_inset LatexCommand \label{fig:portals}
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--Figure\SpecialChar ~
--
--\begin_inset LatexCommand \ref{fig:flow}
--
--\end_inset
--
-- illustrates the steps involved in translating a Portal address, starting
-- from the first element in a match list.
-- If the match criteria specified in the match list entry are met and the
-- memory descriptor list accepts the operation
--\begin_inset Foot
--collapsed true
--
--\layout Standard
--
--Memory descriptors can reject operations because a threshold has been exceeded
-- or because the memory region does not have sufficient space, see Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:md}
--
--\end_inset
--
--
--\end_inset
--
--, the operation (put or get) is performed using the memory region specified
-- in the memory descriptor.
-- If the memory descriptor specifies that it is to be unlinked when a threshold
-- has been exceeded, the match list entry is removed from the match list
-- and the resources associated with the memory descriptor and match list
-- entry are reclaimed.
-- Finally, if there is an event queue specified in the memory descriptor,
-- the operation is logged in the event queue.
--\layout Standard
--
--
--\begin_inset Float figure
--placement htbp
--wide false
--collapsed false
--
--\layout Standard
--\align center
--
--\begin_inset Graphics FormatVersion 1
-- filename flow_new.eps
-- display color
-- size_type 0
-- rotateOrigin center
-- lyxsize_type 1
-- lyxwidth 447pt
-- lyxheight 282pt
--\end_inset
--
--
--\layout Caption
--
--Portals Address Translation
--\begin_inset LatexCommand \label{fig:flow}
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--If the match criteria specified in the match list entry are not met, or
-- there is no memory descriptor associated with the match list entry, or
-- the memory descriptor associated with the match list entry rejects the
-- operation, the address translation continues with the next match list entry.
-- If the end of the match list has been reached, the address translation
-- is aborted and the incoming requested is discarded.
--\layout Section
--
--Access Control
--\layout Standard
--
--A process can control access to its portals using an access control list.
-- Each entry in the access control list specifies a process id and a Portal
-- table index.
-- The access control list is actually an array of entries.
-- Each incoming request includes an index into the access control list (i.e.,
-- a
--\begin_inset Quotes eld
--\end_inset
--
--cookie
--\begin_inset Quotes erd
--\end_inset
--
-- or hint).
-- If the id of the process issuing the request doesn't match the id specified
-- in the access control list entry or the Portal table index specified in
-- the request doesn't match the Portal table index specified in the access
-- control list entry, the request is rejected.
-- Process identifiers and Portal table indexes may include wild card values
-- to increase the flexibility of this mechanism.
--
--\layout Standard
--
--Two aspects of this design merit further discussion.
-- First, the model assumes that the information in a message header, the
-- sender's id in particular, is trustworthy.
-- In most contexts, we assume that the entity that constructs the header
-- is trustworthy; however, using cryptographic techniques, we could easily
-- devise a protocol that would ensure the authenticity of the sender.
--\layout Standard
--
--Second, because the access check is performed by the receiver, it is possible
-- that a malicious process will generate thousands of messages that will
-- be denied by the receiver.
-- This could saturate the network and/or the receiver, resulting in a
--\emph on
--denial of service
--\emph default
-- attack.
-- Moving the check to the sender using capabilities, would remove the potential
-- for this form of attack.
-- However, the solution introduces the complexities of capability management
-- (exchange of capabilities, revocation, protections, etc).
--\layout Section
--
--Multi-threaded Applications
--\begin_inset LatexCommand \label{sec:threads}
--
--\end_inset
--
--
--\layout Standard
--
--The Portals API supports a generic view of multi-threaded applications.
-- From the perspective of the Portals API, an application program is defined
-- by a set of processes.
-- Each process defines a unique address space.
-- The Portals API defines access to this address space from other processes
-- (using portals addressing and the data movement operations).
-- A process may have one or more
--\emph on
--threads
--\emph default
-- executing in its address space.
--
--\layout Standard
--
--With the exception of
--\emph on
--PtlEQWait
--\emph default
-- every function in the Portals API is non-blocking and atomic with respect
-- to both other threads and external operations that result from data movement
-- operations.
-- While individual operations are atomic, sequences of these operations may
-- be interleaved between different threads and with external operations.
-- The Portals API does not provide any mechanisms to control this interleaving.
-- It is expected that these mechanisms will be provided by the API used to
-- create threads.
--\layout Chapter
--
--The Portals API
--\begin_inset LatexCommand \label{sec:api}
--
--\end_inset
--
--
--\layout Section
--
--Naming Conventions
--\begin_inset LatexCommand \label{sec:conv}
--
--\end_inset
--
--
--\layout Standard
--
--The Portals API defines two types of entities: functions and types.
-- Function always start with
--\emph on
--Ptl
--\emph default
-- and use mixed upper and lower case.
-- When used in the body of this report, function names appear in italic face,
-- e.g.,
--\emph on
--PtlInit
--\emph default
--.
-- The functions associated with an object type will have names that start
-- with
--\emph on
--Ptl
--\emph default
--, followed by the two letter object type code shown in Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:objcodes}
--
--\end_inset
--
--.
-- As an example, the function
--\emph on
--PtlEQAlloc
--\emph default
-- allocates resources for an event queue.
--\layout Standard
--
--
--\begin_inset Float table
--placement htbp
--wide false
--collapsed false
--
--\layout Caption
--
--Object Type Codes
--\begin_inset LatexCommand \label{tab:objcodes}
--
--\end_inset
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--medskip
--\newline
--
--\end_inset
--
--
--\layout Standard
--\align center
--
--\size small
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="5" columns="3">
--<features firstHeadEmpty="true">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<row bottomline="true">
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\emph on
--xx
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- Name
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- Section
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--EQ
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- Event Queue
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:eq}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- MD
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- Memory Descriptor
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- ME
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- Match list Entry
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:me}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- NI
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- Network Interface
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ni}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--Type names use lower case with underscores to separate words.
-- Each type name starts with
--\family typewriter
--ptl
--\family default
--_ and ends with
--\family typewriter
--_t
--\family default
--.
-- When used in the body of this report, type names appear in a fixed font,
-- e.g.,
--\family typewriter
--ptl_match_bits_t
--\family default
--.
--\layout Standard
--
--Names for constants use upper case with underscores to separate words.
-- Each constant name starts with
--\family typewriter
--PTL_
--\family default
--.
-- When used in the body of this report, type names appear in a fixed font,
-- e.g.,
--\family typewriter
--PTL_OK
--\family default
--.
--\layout Section
--
--Base Types
--\layout Standard
--
--The Portals API defines a variety of base types.
-- These types represent a simple renaming of the base types provided by the
-- C programming language.
-- In most cases these new type names have been introduced to improve type
-- safety and to avoid issues arising from differences in representation sizes
-- (e.g., 16-bit or 32-bit integers).
--\layout Subsection
--
--Sizes
--\begin_inset LatexCommand \label{sec:size-t}
--
--\end_inset
--
--
--\layout Standard
--
--The type
--\family typewriter
--ptl_size_t
--\family default
-- is an unsigned 64-bit integral type used for representing sizes.
--\layout Subsection
--
--Handles
--\begin_inset LatexCommand \label{sec:handle-type}
--
--\end_inset
--
--
--\layout Standard
--
--Objects maintained by the API are accessed through handles.
-- Handle types have names of the form
--\family typewriter
--ptl_handle_
--\emph on
--xx
--\emph default
--_t
--\family default
--, where
--\emph on
--xx
--\emph default
-- is one of the two letter object type codes shown in Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:objcodes}
--
--\end_inset
--
--.
-- For example, the type
--\family typewriter
--ptl_handle_ni_t
--\family default
-- is used for network interface handles.
--\layout Standard
--
--Each type of object is given a unique handle type to enhance type checking.
-- The type,
--\family typewriter
--ptl_handle_any_t
--\family default
--, can be used when a generic handle is needed.
-- Every handle value can be converted into a value of type
--\family typewriter
--ptl_handle_any_t
--\family default
-- without loss of information.
--\layout Standard
--
--Handles are not simple values.
-- Every portals object is associated with a specific network interface and
-- an identifier for this interface (along with an object identifier) is part
-- of the handle for the object.
--\layout Standard
--
--The special value
--\family typewriter
--PTL_EQ_NONE
--\family default
--, of type
--\family typewriter
--ptl_handle_eq_t
--\family default
--, is used to indicate the absence of an event queue.
-- See sections
--\begin_inset LatexCommand \ref{sec:mdfree}
--
--\end_inset
--
-- and\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:mdupdate}
--
--\end_inset
--
-- for uses of this value.
--\layout Subsection
--
--Indexes
--\begin_inset LatexCommand \label{sec:index-type}
--
--\end_inset
--
--
--\layout Standard
--
--The types
--\family typewriter
--ptl_pt_index_t
--\family default
-- and
--\family typewriter
--ptl_ac_index_t
--\family default
-- are integral types used for representing Portal table indexes and access
-- control tables indexes, respectively.
-- See section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:niinit}
--
--\end_inset
--
-- for limits on values of these types.
--\layout Subsection
--
--Match Bits
--\begin_inset LatexCommand \label{sec:mb-type}
--
--\end_inset
--
--
--\layout Standard
--
--The type
--\family typewriter
--ptl_match_bits_t
--\family default
-- is capable of holding unsigned 64-bit integer values.
--\layout Subsection
--
--Network Interfaces
--\begin_inset LatexCommand \label{sec:ni-type}
--
--\end_inset
--
--
--\layout Standard
--
--The type
--\family typewriter
--ptl_interface_t
--\family default
-- is an integral type used for identifying different network interfaces.
-- Users will need to consult the local documentation to determine appropriate
-- values for the interfaces available.
-- The special value
--\family typewriter
--PTL_IFACE_DEFAULT
--\family default
-- identifies the default interface.
--\layout Subsection
--
--Identifiers
--\begin_inset LatexCommand \label{sec:id-type}
--
--\end_inset
--
--
--\layout Standard
--
--The type
--\family typewriter
--ptl_nid_t
--\family default
-- is an integral type used for representing node ids
--\family typewriter
--, ptl_pid_t
--\family default
-- is an integral type for representing process ids, and
--\family typewriter
--ptl_uid_t
--\family default
--is an integral type for representing user ids.
--\layout Standard
--
--The special values
--\family typewriter
--PTL_PID_ANY
--\family default
-- matches any process identifier, PTL_NID_ANY matches any node identifier,
-- and
--\family typewriter
--PTL_UID_ANY
--\family default
-- matches any user identifier.
-- See sections
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
-- and\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:acentry}
--
--\end_inset
--
-- for uses of these values.
--\layout Subsection
--
--Status Registers
--\begin_inset LatexCommand \label{sec:stat-type}
--
--\end_inset
--
--
--\layout Standard
--
--Each network interface maintains an array of status registers that can be
-- accessed using the
--\family typewriter
--PtlNIStatus
--\family default
-- function (see Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:nistatus}
--
--\end_inset
--
--).
-- The type
--\family typewriter
--ptl_sr_index_t
--\family default
-- defines the types of indexes that can be used to access the status registers.
-- The only index defined for all implementations is
--\family typewriter
--PTL_SR_DROP_COUNT
--\family default
-- which identifies the status register that counts the dropped requests for
-- the interface.
-- Other indexes (and registers) may be defined by the implementation.
--\layout Standard
--
--The type
--\family typewriter
--ptl_sr_value_t
--\family default
-- defines the types of values held in status registers.
-- This is a signed integer type.
-- The size is implementation dependent, but must be at least 32 bits.
--\layout Section
--
--Initialization and Cleanup
--\begin_inset LatexCommand \label{sec:init}
--
--\end_inset
--
--
--\layout Standard
--
--The Portals API includes a function,
--\emph on
--PtlInit
--\emph default
--, to initialize the library and a function,
--\emph on
--PtlFini
--\emph default
--, to cleanup after the application is done using the library.
--\layout Subsection
--
--PtlInit
--\begin_inset LatexCommand \label{sec:ptlinit}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlInit( int *max_interfaces );
--\layout Standard
--\noindent
--The
--\emph on
--PtlInit
--\emph default
-- function initializes the Portals library.
-- PtlInit must be called at least once by a process before any thread makes
-- a Portals function call, but may be safely called more than once.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_FAIL Indicates an error during initialization.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--max_interfaces
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="1" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="5in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--max_interfaces
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold the maximum number of interfaces
-- that can be initialized.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlFini
--\begin_inset LatexCommand \label{sec:ptlfini}
--
--\end_inset
--
--
--\layout LyX-Code
--
--void PtlFini( void );
--\layout Standard
--\noindent
--The
--\emph on
--PtlFini
--\emph default
-- function cleans up after the Portals library is no longer needed by a process.
-- After this function is called, calls to any of the functions defined by
-- the Portal API or use of the structures set up by the Portals API will
-- result in undefined behavior.
-- This function should be called once and only once during termination by
-- a process.
-- Typically, this function will be called in the exit sequence of a process.
-- Individual threads should not call PtlFini when they terminate.
--\layout Section
--
--Network Interfaces
--\begin_inset LatexCommand \label{sec:ni}
--
--\end_inset
--
--
--\layout Standard
--
--The Portals API supports the use of multiple network interfaces.
-- However, each interface is treated as an independent entity.
-- Combining interfaces (e.g.,
--\begin_inset Quotes eld
--\end_inset
--
--bonding
--\begin_inset Quotes erd
--\end_inset
--
-- to create a higher bandwidth connection) must be implemented by the application
-- or embedded in the underlying network.
-- Interfaces are treated as independent entities to make it easier to cache
-- information on individual network interface cards.
--\layout Standard
--
--Once initialized, each interface provides a Portal table, an access control
-- table, and a collection of status registers.
-- See Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:me}
--
--\end_inset
--
-- for a discussion of updating Portal table entries using the
--\emph on
--PtlMEAttach
--\emph default
-- function.
-- See Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:ac}
--
--\end_inset
--
-- for a discussion of the initialization and updating of entries in the access
-- control table.
-- See Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:nistatus}
--
--\end_inset
--
-- for a discussion of the
--\emph on
--PtlNIStatus
--\emph default
-- function which can be used to determine the value of a status register.
--\layout Standard
--
--Every other type of Portal object (e.g., memory descriptor, event queue, or
-- match list entry) is associated with a specific network interface.
-- The association to a network interface is established when the object is
-- created and is encoded in the handle for the object.
--\layout Standard
--
--Each network interface is initialized and shutdown independently.
-- The initialization routine,
--\emph on
--PtlNIInit
--\emph default
--, returns a handle for an interface object which is used in all subsequent
-- Portal operations.
-- The
--\emph on
--PtlNIFini
--\emph default
-- function is used to shutdown an interface and release any resources that
-- are associated with the interface.
-- Network interface handles are associated with processes, not threads.
-- All threads in a process share all of the network interface handles.
--\layout Standard
--
--The Portals API also defines the
--\emph on
--PtlNIStatus
--\emph default
-- function to query the status registers for a network interface, the
--\emph on
--PtlNIDist
--\emph default
-- function to determine the
--\begin_inset Quotes eld
--\end_inset
--
--distance
--\begin_inset Quotes erd
--\end_inset
--
-- to another process, and the
--\emph on
--PtlNIHandle
--\emph default
-- function to determine the network interface that an object is associated
-- with.
--\layout Subsection
--
--PtlNIInit
--\begin_inset LatexCommand \label{sec:niinit}
--
--\end_inset
--
--
--\layout LyX-Code
--
--typedef struct {
--\newline
-- int max_match_entries;
--\newline
-- int max_mem_descriptors;
--\newline
-- int max_event_queues;
--\newline
-- ptl_ac_index_t max_atable_index;
--\newline
-- ptl_pt_index_t max_ptable_index;
--\newline
--} ptl_ni_limits_t;
--\newline
--
--\newline
--int PtlNIInit( ptl_interface_t interface
--\newline
-- ptl_pid_t pid,
--\newline
-- ptl_ni_limits_t* desired,
--\newline
-- ptl_ni_limits_t* actual,
--\newline
-- ptl_handle_ni_t* handle );
--\layout Standard
--
--Values of type
--\family typewriter
--ptl_ni_limits_t
--\family default
-- include the following members:
--\layout Description
--
--max_match_entries Maximum number of match entries that can be allocated
-- at any one time.
--\layout Description
--
--max_mem_descriptors Maximum number of memory descriptors that can be allocated
-- at any one time.
--\layout Description
--
--max_event_queues Maximum number of event queues that can be allocated at
-- any one time.
--\layout Description
--
--max_atable_index Largest access control table index for this interface,
-- valid indexes range from zero to
--\family typewriter
--max_atable_index
--\family default
--, inclusive.
--\layout Description
--
--max_ptable_index Largest Portal table index for this interface, valid indexes
-- range from zero to
--\family typewriter
--max_ptable_index
--\family default
--, inclusive.
--\layout Standard
--\noindent
--The
--\emph on
--PtlNIInit
--\emph default
-- function is used to initialized the Portals API for a network interface.
-- This function must be called at least once by each process before any other
-- operations that apply to the interface by any process or thread.
-- For subsequent calls to
--\shape italic
--PtlNIInit
--\shape default
-- from within the same process (either by different threads or the same thread),
-- the desired limits will be ignored and the call will return the existing
-- NI handle.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INIT_DUP Indicates a duplicate initialization of
--\family typewriter
--interface
--\family default
--.
--
--\layout Description
--
--PTL_INIT_INV Indicates that
--\family typewriter
--interface
--\family default
-- is not a valid network interface.
--
--\layout Description
--
--PTL_NOSPACE Indicates that there is insufficient memory to initialize the
-- interface.
--
--\layout Description
--
--PTL_INV_PROC Indicates that
--\family typewriter
--pid
--\family default
-- is not a valid process id.
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--actual
--\family default
--or
--\family typewriter
-- handle
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="5" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Identifies the network interface to be initialized.
-- (See section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:ni-type}
--
--\end_inset
--
-- for a discussion of values used to identify network interfaces.)
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--pid
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Identifies the desired process id (for well known process ids).
-- The value
--\family typewriter
--PTL_PID_ANY
--\family default
-- may be used to have the process id assigned by the underlying library.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--desired
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--If non-NULL, points to a structure that holds the desired limits.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--actual
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, the location pointed to by actual will hold the actual
-- limits.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold a handle for the interface.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Comment
--
--The use of desired is implementation dependent.
-- In particular, an implementation may choose to ignore this argument.
--\layout Subsection
--
--PtlNIFini
--\begin_inset LatexCommand \label{sec:nifini}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlNIFini( ptl_handle_ni_t interface );
--\layout Standard
--\noindent
--The
--\emph on
--PtlNIFini
--\emph default
-- function is used to release the resources allocated for a network interface.
-- Once the
--\emph on
--PtlNIFini
--\emph default
-- operation has been started, the results of pending API operations (e.g.,
-- operations initiated by another thread) for this interface are undefined.
-- Similarly, the effects of incoming operations (puts and gets) or return
-- values (acknowledgements and replies) for this interface are undefined.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--interface
--\family default
-- is not a valid network interface handle.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="1" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--A handle for the interface to shutdown.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlNIStatus
--\begin_inset LatexCommand \label{sec:nistatus}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlNIStatus( ptl_handle_ni_t interface,
--\newline
-- ptl_sr_index_t status_register,
--\newline
-- ptl_sr_value_t* status );
--\layout Standard
--\noindent
--The
--\emph on
--PtlNIStatus
--\emph default
-- function returns the value of a status register for the specified interface.
-- (See section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:stat-type}
--
--\end_inset
--
-- for more information on status register indexes and status register values.)
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--interface
--\family default
-- is not a valid network interface handle.
--
--\layout Description
--
--PTL_INV_SR_INDX Indicates that
--\family typewriter
--status_register
--\family default
-- is not a valid status register.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--status
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="3" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the interface to use.
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--status_register
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--An index for the status register to read.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--status
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold the current value of the status
-- register.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Comment
--
--The only status register that must be defined is a drop count register (
--\family typewriter
--PTL_SR_DROP_COUNT
--\family default
--).
-- Implementations may define additional status registers.
-- Identifiers for the indexes associated with these registers should start
-- with the prefix
--\family typewriter
--PTL_SR_
--\family default
--.
--\layout Subsection
--
--PtlNIDist
--\layout LyX-Code
--
--int PtlNIDist( ptl_handle_ni_t interface,
--\newline
-- ptl_process_id_t process,
--\newline
-- unsigned long* distance );
--\layout Standard
--\noindent
--The
--\emph on
--PtlNIDist
--\emph default
-- function returns the distance to another process using the specified interface.
-- Distances are only defined relative to an interface.
-- Distance comparisons between different interfaces on the same process may
-- be meaningless.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--interface
--\family default
-- is not a valid network interface handle.
--
--\layout Description
--
--PTL_INV_PROC Indicates that
--\family typewriter
--process
--\family default
-- is not a valid process identifier.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--distance
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="3" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the interface to use.
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--process
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--An identifier for the process whose distance is being requested.
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--distance
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold the distance to the remote
-- process.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Comment
--
--This function should return a static measure of distance.
-- Examples include minimum latency, the inverse of available bandwidth, or
-- the number of switches between the two endpoints.
--\layout Subsection
--
--PtlNIHandle
--\layout LyX-Code
--
--int PtlNIHandle( ptl_handle_any_t handle,
--\newline
-- ptl_handle_ni_t* interface );
--\layout Standard
--\noindent
--The
--\emph on
--PtlNIHandle
--\emph default
-- function returns a handle for the network interface with which the object
-- identified by
--\family typewriter
--handle
--\family default
-- is associated.
-- If the object identified by
--\family typewriter
--handle
--\family default
-- is a network interface, this function returns the same value it is passed.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_HANDLE Indicates that
--\family typewriter
--handle
--\family default
-- is not a valid handle.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--interface
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="2" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the object.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold a handle for the network interface
-- associated with
--\family typewriter
--handle
--\family default
--.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Comment
--
--Every handle should encode the network interface and the object id relative
-- to this handle.
-- Both are presumably encoded using integer values.
--\layout Section
--
--User Identification
--\begin_inset LatexCommand \label{sec:uid}
--
--\end_inset
--
--
--\layout Standard
--
--Every process runs on behalf of a user.
--
--\layout Subsection
--
--PtlGetUid
--\layout LyX-Code
--
--int PtlGetUid( ptl_handle_ni_t ni_handle,
--\newline
-- ptl_uid_t* uid );
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--ni_handle
--\family default
-- is not a valid network interface handle.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--interface
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="2" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="5in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A network interface handle.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--id
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold the user id for the calling
-- process.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Comment
--
--Note that user identifiers are dependent on the network interface(s).
-- In particular, if a node has multiple interfaces, a process may have multiple
-- user identifiers.
--\layout Section
--
--Process Identification
--\begin_inset LatexCommand \label{sec:pid}
--
--\end_inset
--
--
--\layout Standard
--
--Processes that use the Portals API, can be identified using a node id and
-- process id.
-- Every node accessible through a network interface has a unique node identifier
-- and every process running on a node has a unique process identifier.
-- As such, any process in the computing system can be identified by its node
-- id and process id.
--
--\layout Standard
--
--The Portals API defines a type,
--\family typewriter
--ptl_process_id_t
--\family default
-- for representing process ids and a function,
--\emph on
--PtlGetId
--\emph default
--, which can be used to obtain the id of the current process.
--\layout Comment
--
--The portals API does not include thread identifiers.
-- Messages are delivered to processes (address spaces) not threads (contexts
-- of execution).
--\layout Subsection
--
--The Process Id Type
--\begin_inset LatexCommand \label{sec:pid-type}
--
--\end_inset
--
--
--\layout LyX-Code
--
--typedef struct {
--\newline
-- ptl_nid_t nid; /* node id */
--\newline
-- ptl_pid_t pid; /* process id */
--\newline
--} ptl_process_id_t;
--\layout Standard
--\noindent
--The
--\family typewriter
--ptl_process_id_t
--\family default
-- type uses two identifiers to represent a process id: a node id and a process
-- id.
--
--\layout Subsection
--
--PtlGetId
--\begin_inset LatexCommand \label{sub:PtlGetId}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlGetId( ptl_handle_ni_t ni_handle,
--\newline
-- ptl_process_id_t* id );
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--ni_handle
--\family default
-- is not a valid network interface handle.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--id
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="2" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="5in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A network interface handle.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--id
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold the id for the calling process.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Comment
--
--Note that process identifiers are dependent on the network interface(s).
-- In particular, if a node has multiple interfaces, it may have multiple
-- node identifiers.
--\layout Section
--
--Match List Entries and Match Lists
--\begin_inset LatexCommand \label{sec:me}
--
--\end_inset
--
--
--\layout Standard
--
--A match list is a chain of match list entries.
-- Each match list entry includes a memory descriptor and a set of match criteria.
-- The match criteria can be used to reject incoming requests based on process
-- id or the match bits provided in the request.
-- A match list is created using the
--\emph on
--PtlMEAttach
--\emph default
-- or
--\shape italic
--PtlMEAttachAny
--\shape default
-- functions, which create a match list consisting of a single match list
-- entry, attaches the match list to the specified Portal index, and returns
-- a handle for the match list entry.
-- Match entries can be dynamically inserted and removed from a match list
-- using the
--\emph on
--PtlMEInsert
--\emph default
-- and
--\emph on
--PtlMEUnlink
--\emph default
-- functions.
--\layout Subsection
--
--PtlMEAttach
--\begin_inset LatexCommand \label{sec:meattach}
--
--\end_inset
--
--
--\layout LyX-Code
--
--typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t;
--\newline
--
--\layout LyX-Code
--
--typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t;
--\newline
--
--\layout LyX-Code
--
--int PtlMEAttach( ptl_handle_ni_t interface,
--\newline
-- ptl_pt_index_t index,
--\newline
-- ptl_process_id_t matchid,
--\newline
-- ptl_match_bits_t match_bits,
--\newline
-- ptl_match_bits_t ignorebits,
--\newline
-- ptl_unlink_t unlink,
--\newline
-- ptl_ins_pos_t position,
--\newline
-- ptl_handle_me_t* handle );
--\layout Standard
--\noindent
--Values of the type
--\family typewriter
--ptl_ins_pos_t
--\family default
-- are used to control where a new item is inserted.
-- The value
--\family typewriter
--PTL_INS_BEFORE
--\family default
-- is used to insert the new item before the current item or before the head
-- of the list.
-- The value
--\family typewriter
--PTL_INS_AFTER
--\family default
-- is used to insert the new item after the current item or after the last
-- item in the list.
--
--\layout Standard
--
--The
--\emph on
--PtlMEAttach
--\emph default
-- function creates a match list consisting of a single entry and attaches
-- this list to the Portal table for
--\family typewriter
--interface
--\family default
--.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--interface
--\family default
-- is not a valid network interface handle.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_PTINDEX Indicates that
--\family typewriter
--index
--\family default
-- is not a valid Portal table index.
--
--\layout Description
--
--PTL_INV_PROC Indicates that
--\family typewriter
--matchid
--\family default
-- is not a valid process identifier.
--
--\layout Description
--
--PTL_NOSPACE Indicates that there is insufficient memory to allocate the
-- match list entry.
--
--\layout Description
--
--PTL_ML_TOOLONG Indicates that the resulting match list is too long.
-- The maximum length for a match list is defined by the interface.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="7" columns="3">
--<features>
--<column alignment="left" valignment="top" width="0.8in">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.75in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the interface to use.
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--index
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The Portal table index where the match list should be attached.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--matchid
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Specifies the match criteria for the process id of the requestor.
-- The constants
--\family typewriter
--PTL_PID_ANY
--\family default
-- and
--\family typewriter
--PTL_NID_ANY
--\family default
-- can be used to wildcard either of the ids in the
--\family typewriter
--ptl_process_id_t
--\family default
-- structure.
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--match_bits, ignorebits
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Specify the match criteria to apply to the match bits in the incoming request.
-- The
--\family typewriter
--ignorebits
--\family default
-- are used to mask out insignificant bits in the incoming match bits.
-- The resulting bits are then compared to the match list entry's match
-- bits to determine if the incoming request meets the match criteria.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--unlink
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Indicates the match list entry should be unlinked when the last memory descripto
--r associated with this match list entry is unlinked.
-- (Note, the check for unlinking a match entry only occurs when a memory
-- descriptor is unlinked.)
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--position
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Indicates whether the new match entry should be prepended or appended to
-- the existing match list.
-- If there is no existing list, this argument is ignored and the new match
-- entry becomes the only entry in the list.
-- Allowed constants:
--\family typewriter
--PTL_INS_BEFORE
--\family default
--,
--\family typewriter
--PTL_INS_AFTER
--\family default
--.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold a handle for the newly created
-- match list entry.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlMEAttachAny
--\begin_inset LatexCommand \label{sec:attachany}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlMEAttachAny( ptl_handle_ni_t interface,
--\newline
-- ptl_pt_index_t *index,
--\newline
-- ptl_process_id_t matchid,
--\newline
-- ptl_match_bits_t match_bits,
--\newline
-- ptl_match_bits_t ignorebits,
--\newline
-- ptl_unlink_t unlink,
--\newline
-- ptl_handle_me_t* handle );
--\layout Standard
--
--The
--\emph on
--PtlMEAttachAny
--\emph default
-- function creates a match list consisting of a single entry and attaches
-- this list to an unused Portal table entry for
--\family typewriter
--interface
--\family default
--.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--interface
--\family default
-- is not a valid network interface handle.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_PROC Indicates that
--\family typewriter
--matchid
--\family default
-- is not a valid process identifier.
--
--\layout Description
--
--PTL_NOSPACE Indicates that there is insufficient memory to allocate the
-- match list entry.
--
--\layout Description
--
--PTL_PT_FULL Indicates that there are no free entries in the Portal table.
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="4" columns="3">
--<features>
--<column alignment="left" valignment="top" width="0.8in">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.75in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the interface to use.
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--index
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On succesfful return, this location will hold the Portal index where the
-- match list has been attached.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--matchid, match_bits, ignorebits, unlink
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--See the discussion for
--\shape italic
--PtlMEAttach
--\shape default
--.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold a handle for the newly created
-- match list entry.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlMEInsert
--\begin_inset LatexCommand \label{sec:meinsert}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlMEInsert( ptl_handle_me_t current,
--\newline
-- ptl_process_id_t matchid,
--\newline
-- ptl_match_bits_t match_bits,
--\newline
-- ptl_match_bits_t ignorebits,
--\newline
-- ptl_ins_pos_t position,
--\newline
-- ptl_handle_me_t* handle );
--\layout Standard
--
--The
--\emph on
--PtlMEInsert
--\emph default
-- function creates a new match list entry and inserts this entry into the
-- match list containing
--\family typewriter
--current
--\family default
--.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_PROC Indicates that
--\family typewriter
--matchid
--\family default
-- is not a valid process identifier.
--
--\layout Description
--
--PTL_INV_ME Indicates that
--\family typewriter
--current
--\family default
-- is not a valid match entry handle.
--
--\layout Description
--
--PTL_ML_TOOLONG Indicates that the resulting match list is too long.
-- The maximum length for a match list is defined by the interface.
--
--\layout Description
--
--PTL_NOSPACE Indicates that there is insufficient memory to allocate the
-- match entry.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="4" columns="3">
--<features>
--<column alignment="left" valignment="top" width="0.8in">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--current
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for a match entry.
-- The new match entry will be inserted immediately before or immediately
-- after this match entry.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--matchid
--\family default
--,
--\family typewriter
--match_bits
--\family default
--,
--\family typewriter
--ignorebits
--\family default
--,
--\family typewriter
--unlink
--\family default
--
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--See the discussion for
--\emph on
--PtlMEAttach
--\emph default
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--position
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Indicates whether the new match entry should be inserted before or after
-- the
--\family typewriter
--current
--\family default
-- entry.
-- Allowed constants:
--\family typewriter
--PTL_INS_BEFORE
--\family default
--,
--\family typewriter
--PTL_INS_AFTER
--\family default
--.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--See the discussion for
--\emph on
--PtlMEAttach
--\emph default
--.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlMEUnlink
--\begin_inset LatexCommand \label{sec:meunlink}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlMEUnlink( ptl_handle_me_t entry );
--\layout Standard
--\noindent
--The
--\emph on
--PtlMEUnlink
--\emph default
-- function can be used to unlink a match entry from a match list.
-- This operation also releases any resources associated with the match entry
-- (including the associated memory descriptor).
-- It is an error to use the match entry handle after calling
--\emph on
--PtlMEUnlink
--\emph default
--.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_ME Indicates that
--\family typewriter
--entry
--\family default
-- is not a valid match entry handle.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="1" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--entry
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--A handle for the match entry to be unlinked.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Section
--
--Memory Descriptors
--\begin_inset LatexCommand \label{sec:md}
--
--\end_inset
--
--
--\layout Standard
--
--A memory descriptor contains information about a region of an application
-- process' memory and an event queue where information about the operations
-- performed on the memory descriptor are recorded.
-- The Portals API provides two operations to create memory descriptors:
--\emph on
--PtlMDAttach
--\emph default
--, and
--\emph on
--PtlMDBind
--\emph default
--; an operation to update a memory descriptor,
--\emph on
--PtlMDUpdate
--\emph default
--; and an operation to unlink and release the resources associated with a
-- memory descriptor,
--\emph on
--PtlMDUnlink
--\emph default
--.
--\layout Subsection
--
--The Memory Descriptor Type
--\begin_inset LatexCommand \label{sec:md-type}
--
--\end_inset
--
--
--\layout LyX-Code
--
--typedef struct {
--\newline
-- void* start;
--\newline
-- ptl_size_t length;
--\newline
-- int threshold;
--\newline
-- unsigned int max_offset;
--\newline
-- unsigned int options;
--\newline
-- void* user_ptr;
--\newline
-- ptl_handle_eq_t eventq;
--\newline
--} ptl_md_t;
--\layout Standard
--\noindent
--The
--\family typewriter
--ptl_md_t
--\family default
-- type defines the application view of a memory descriptor.
-- Values of this type are used to initialize and update the memory descriptors.
--\layout Subsubsection
--
--Members
--\layout Description
--
--start,\SpecialChar ~
--length Specify the memory region associated with the memory descriptor.
-- The
--\family typewriter
--start
--\family default
-- member specifies the starting address for the memory region and the
--\family typewriter
--length
--\family default
-- member specifies the length of the region.
-- The
--\family typewriter
--start member
--\family default
-- can be NULL provided that the
--\family typewriter
--length
--\family default
-- member is zero.
-- (Zero length buffers are useful to record events.) There are no alignment
-- restrictions on the starting address or the length of the region; although,
-- unaligned messages may be slower (i.e., lower bandwidth and/or longer latency)
-- on some implementations.
--
--\layout Description
--
--threshold Specifies the maximum number of operations that can be performed
-- on the memory descriptor.
-- An operation is any action that could possibly generate an event (see Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
-- for the different types of events).
-- In the usual case, the threshold value is decremented for each operation
-- on the memory descriptor.
-- When the threshold value is zero, the memory descriptor is
--\emph on
--inactive
--\emph default
--, and does not respond to operations.
-- A memory descriptor can have an initial threshold value of zero to allow
-- for manipulation of an inactive memory descriptor by the local process.
-- A threshold value of
--\family typewriter
--PTL_MD_THRESH_INF
--\family default
-- indicates that there is no bound on the number of operations that may be
-- applied to a memory descriptor.
-- Note that local operations (e.g.,
--\emph on
--PtlMDUpdate
--\emph default
--) are not applied to the threshold count.
--
--\layout Description
--
--max_offset Specifies the maximum local offset of a memory descriptor.
-- When the local offset of a memory descriptor exceeds this maximum, the
-- memory descriptor becomes
--\shape italic
--inactive
--\shape default
-- and does not respond to further operations.
--\layout Description
--
--options Specifies the behavior of the memory descriptor.
-- There are five options that can be selected: enable put operations (yes
-- or no), enable get operations (yes or no), offset management (local or
-- remote), message truncation (yes or no), and acknowledgement (yes or no).
-- Values for this argument can be constructed using a bitwise or of the following
-- values:
--\begin_deeper
--\begin_deeper
--\layout Description
--
--PTL_MD_OP_PUT Specifies that the memory descriptor will respond to
--\emph on
--put
--\emph default
-- operations.
-- By default, memory descriptors reject
--\emph on
--put
--\emph default
-- operations.
--
--\layout Description
--
--PTL_MD_OP_GET Specifies that the memory descriptor will respond to
--\emph on
--get
--\emph default
-- operations.
-- By default, memory descriptors reject
--\emph on
--get
--\emph default
-- operations.
--
--\layout Description
--
--PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory
-- region is provided by the incoming request.
-- By default, the offset is maintained locally.
-- When the offset is maintained locally, the offset is incremented by the
-- length of the request so that the next operation (put and/or get) will
-- access the next part of the memory region.
--\layout Description
--
--PTL_MD_TRUNCATE Specifies that the length provided in the incoming request
-- can be reduced to match the memory available in the region.
-- (The memory available in a memory region is determined by subtracting the
-- offset from the length of the memory region.) By default, if the length
-- in the incoming operation is greater than the amount of memory available,
-- the operation is rejected.
--
--\layout Description
--
--PTL_MD_ACK_DISABLE Specifies that an acknowledgement should
--\emph on
--not
--\emph default
-- be sent for incoming
--\emph on
--put
--\emph default
-- operations, even if requested.
-- By default, acknowledgements are sent for
--\emph on
--put
--\emph default
-- operations that request an acknowledgement.
-- Acknowledgements are never sent for
--\emph on
--get
--\emph default
-- operations.
-- The value sent in the reply serves as an implicit acknowledgement.
--
--\end_deeper
--\layout Standard
--
--
--\series bold
--Note
--\series default
--: It is not considered an error to have a memory descriptor that does not
-- respond to either
--\emph on
--put
--\emph default
-- or
--\emph on
--get
--\emph default
-- operations: Every memory descriptor responds to
--\emph on
--reply
--\emph default
-- operations.
-- Nor is it considered an error to have a memory descriptor that responds
-- to both
--\emph on
--put
--\emph default
-- and
--\emph on
--get
--\emph default
-- operations.
--
--\end_deeper
--\layout Description
--
--user_ptr A user-specified value that is associated with the memory descriptor.
-- The value does not need to be a pointer, but must fit in the space used
-- by a pointer.
-- This value (along with other values) is recorded in events associated with
-- operations on this memory descriptor.
--\begin_inset Foot
--collapsed true
--
--\layout Standard
--
--Tying the memory descriptor to a user-defined value can be useful when multiple
-- memory descriptor share the same event queue or when the memory descriptor
-- needs to be associated with a data structure maintained by the application.
-- For example, an MPI implementation can set the
--\family typewriter
--user_ptr
--\family default
-- argument to the value of an MPI Request.
-- This direct association allows for processing of memory descriptor's by
-- the MPI implementation without a table lookup or a search for the appropriate
-- MPI Request.
--\end_inset
--
--
--\layout Description
--
--eventq A handle for the event queue used to log the operations performed
-- on the memory region.
-- If this argument is
--\family typewriter
--PTl_EQ_NONE
--\family default
--, operations performed on this memory descriptor are not logged.
--
--\layout Subsection
--
--PtlMDAttach
--\begin_inset LatexCommand \label{sec:mdattach}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlMDAttach( ptl_handle_me_t match,
--\newline
-- ptl_md_t mem_desc,
--\newline
-- ptl_unlink_t unlink_op,
--\newline
-- ptl_unlink_t unlink_nofit,
--\newline
-- ptl_handle_md_t* handle );
--\layout Standard
--\noindent
--Values of the type
--\family typewriter
--ptl_unlink_t
--\family default
-- are used to control whether an item is unlinked from a list.
-- The value
--\family typewriter
--PTL_UNLINK
--\family default
-- enables unlinking.
-- The value
--\family typewriter
--PTL_RETAIN
--\family default
-- disables unlinking.
--\layout Standard
--
--The
--\emph on
--PtlMDAttach
--\emph default
-- operation is used to create a memory descriptor and attach it to a match
-- list entry.
-- An error code is returned if this match list entry already has an associated
-- memory descriptor.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INUSE Indicates that
--\family typewriter
--match
--\family default
-- already has a memory descriptor attached.
--
--\layout Description
--
--PTL_INV_ME Indicates that
--\family typewriter
--match
--\family default
-- is not a valid match entry handle.
--
--\layout Description
--
--PTL_ILL_MD Indicates that
--\family typewriter
--mem_desc
--\family default
-- is not a legal memory descriptor.
-- This may happen because the memory region defined in
--\family typewriter
--mem_desc
--\family default
-- is invalid or because the network interface associated with the
--\family typewriter
--eventq
--\family default
-- in
--\family typewriter
--mem_desc
--\family default
-- is not the same as the network interface associated with
--\family typewriter
--match
--\family default
--.
--
--\layout Description
--
--PTL_NOSPACE Indicates that there is insufficient memory to allocate the
-- memory descriptor.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="5" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--match
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the match entry that the memory descriptor will be associated
-- with.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Provides initial values for the application visible parts of a memory descriptor.
-- Other than its use for initialization, there is no linkage between this
-- structure and the memory descriptor maintained by the API.
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--unlink_op
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A flag to indicate whether the memory descriptor is unlinked when it becomes
-- inactive, either because the operation threshold drops to zero or because
-- the maximum offset has been exceeded.
-- (Note, the check for unlinking a memory descriptor only occurs after a
-- the completion of a successful operation.
-- If the threshold is set to zero during initialization or using
--\emph on
--PtlMDUpdate
--\emph default
--, the memory descriptor is
--\series bold
--not
--\series default
-- unlinked.)
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--unlink_nofit
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A flag to indicate whether the memory descriptor is unlinked when the space
-- remaining in the memory descriptor is not sufficient for a matching operation.
-- If an incoming message arrives arrives at a memory descriptor that does
-- not have sufficient space and the
--\series bold
--PTL_MD_TRUNCATE
--\series default
-- operation is not specified, the memory descriptor will be unlinked.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold a handle for the newly created
-- memory descriptor.
-- The
--\family typewriter
--handle
--\family default
-- argument can be NULL, in which case the handle will not be returned.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlMDBind
--\begin_inset LatexCommand \label{sec:mdbind}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlMDBind( ptl_handle_ni_t interface,
--\newline
-- ptl_md_t mem_desc,
--\newline
-- ptl_handle_md_t* handle );
--\layout Standard
--\noindent
--The
--\emph on
--PtlMDBind
--\emph default
-- operation is used to create a
--\begin_inset Quotes eld
--\end_inset
--
--free floating
--\begin_inset Quotes erd
--\end_inset
--
-- memory descriptor, i.e., a memory descriptor that is not associated with
-- a match list entry.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--interface
--\family default
-- is not a valid match entry handle.
--
--\layout Description
--
--PTL_ILL_MD Indicates that
--\family typewriter
--mem_desc
--\family default
-- is not a legal memory descriptor.
-- This may happen because the memory region defined in
--\family typewriter
--mem_desc
--\family default
-- is invalid or because the network interface associated with the
--\family typewriter
--eventq
--\family default
-- in
--\family typewriter
--mem_desc
--\family default
-- is not the same as the network interface,
--\family typewriter
--interface
--\family default
--.
--
--\layout Description
--
--PTL_INV_EQ Indicates that the event queue associated with
--\family typewriter
--mem_desc
--\family default
-- is not valid.
--
--\layout Description
--
--PTL_NOSPACE Indicates that there is insufficient memory to allocate the
-- memory descriptor.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--handle
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="3" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the network interface with which the memory descriptor will
-- be associated.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Provides initial values for the application visible parts of a memory descriptor.
-- Other than its use for initialization, there is no linkage between this
-- structure and the memory descriptor maintained by the API.
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold a handle for the newly created
-- memory descriptor.
-- The
--\family typewriter
--handle
--\family default
-- argument must be a valid address and cannot be NULL.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlMDUnlink
--\begin_inset LatexCommand \label{sec:mdfree}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlMDUnlink( ptl_handle_md_t mem_desc );
--\layout Standard
--\noindent
--The
--\emph on
--PtlMDUnlink
--\emph default
-- function unlinks the memory descriptor from any match list entry it may
-- be linked to and releases the resources associated with a memory descriptor.
-- (This function does not free the memory region associated with the memory
-- descriptor.) This function also releases the resources associated with a
-- floating memory descriptor.
-- Only memory descriptors with no pending operations may be unlinked.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_MD Indicates that
--\family typewriter
--mem_desc
--\family default
-- is not a valid memory descriptor handle.
--\layout Description
--
--PTL_MD_INUSE Indicates that
--\family typewriter
--mem_desc
--\family default
-- has pending operations and cannot be unlinked.
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="1" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the memory descriptor to be released.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlMDUpdate
--\begin_inset LatexCommand \label{sec:mdupdate}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlMDUpdate( ptl_handle_md_t mem_desc,
--\newline
-- ptl_md_t* old_md,
--\newline
-- ptl_md_t* new_md,
--\newline
-- ptl_handle_eq_t testq );
--\layout Standard
--\noindent
--The
--\emph on
--PtlMDUpdate
--\emph default
-- function provides a conditional, atomic update operation for memory descriptors.
-- The memory descriptor identified by
--\family typewriter
--mem_desc
--\family default
-- is only updated if the event queue identified by
--\family typewriter
--testq
--\family default
-- is empty.
-- The intent is to only enable updates to the memory descriptor when no new
-- messages have arrived since the last time the queue was checked.
-- See section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:exmpi}
--
--\end_inset
--
-- for an example of how this function can be used.
--\layout Standard
--
--If
--\family typewriter
--new
--\family default
-- is not NULL the memory descriptor identified by handle will be updated
-- to reflect the values in the structure pointed to by
--\family typewriter
--new
--\family default
-- if
--\family typewriter
--testq
--\family default
-- has the value
--\family typewriter
--PTL_EQ_NONE
--\family default
-- or if the event queue identified by
--\family typewriter
--testq
--\family default
-- is empty.
-- If
--\family typewriter
--old
--\family default
-- is not NULL, the current value of the memory descriptor identified by
--\family typewriter
--mem_desc
--\family default
-- is recorded in the location identified by
--\family typewriter
--old
--\family default
--.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_NOUPDATE Indicates that the update was not performed because
--\family typewriter
--testq
--\family default
-- was not empty.
--
--\layout Description
--
--PTL_INV_MD Indicates that
--\family typewriter
--mem_desc
--\family default
-- is not a valid memory descriptor handle.
--
--\layout Description
--
--PTL_ILL_MD Indicates that the value pointed to by
--\family typewriter
--new
--\family default
-- is not a legal memory descriptor (e.g., the memory region specified by the
-- memory descriptor may be invalid).
--
--\layout Description
--
--PTL_INV_EQ Indicates that
--\family typewriter
--testq
--\family default
-- is not a valid event queue handle.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--new
--\family default
-- or
--\family typewriter
--old
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="4" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the memory descriptor to update.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--old_md
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--If
--\family typewriter
--old_md
--\family default
-- is not the value
--\family typewriter
--NULL
--\family default
--, the current value of the memory descriptor will be stored in the location
-- identified by
--\family typewriter
--old
--\family default
--_md.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--new_md
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--If
--\family typewriter
--new_md
--\family default
-- is not the value
--\family typewriter
--NULL
--\family default
--, this argument provides the new values for the memory descriptor, if the
-- update is performed.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--testq
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for an event queue used to predicate the update.
-- If
--\family typewriter
--testq
--\family default
-- is equal to
--\family typewriter
--PTL_EQ_NONE
--\family default
--, the update is performed unconditionally.
-- Otherwise, the update is performed if and only if
--\family typewriter
--testq
--\family default
-- is empty.
-- If the update is not performed, the function returns the value
--\family typewriter
--PTL_NOUPDATE
--\family default
--.
-- (Note, the
--\family typewriter
--testq
--\family default
-- argument does not need to be the same as the event queue associated with
-- the memory descriptor.)
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Standard
--
--The conditional update can be used to ensure that the memory descriptor
-- has not changed between the time it was examined and the time it is updated.
-- In particular, it is needed to support an MPI implementation where the
-- activity of searching an unexpected message queue and posting a receive
-- must be atomic.
--\layout Section
--
--Events and Event Queues
--\begin_inset LatexCommand \label{sec:eq}
--
--\end_inset
--
--
--\layout Standard
--
--Event queues are used to log operations performed on memory descriptors.
-- They can also be used to hold acknowledgements for completed
--\emph on
--put
--\emph default
-- operations and to note when the data specified in a
--\emph on
--put
--\emph default
-- operation has been sent (i.e., when it is safe to reuse the buffer that holds
-- this data).
-- Multiple memory descriptors can share a single event queue.
--\layout Standard
--
--In addition to the
--\family typewriter
--ptl_handle_eq_t
--\family default
-- type, the Portals API defines two types associated with events: The
--\family typewriter
--
--\newline
--ptl_event_kind_t
--\family default
-- type defines the kinds of events that can be stored in an event queue.
-- The
--\family typewriter
--ptl_event_t
--\family default
-- type defines a structure that holds the information associated with an
-- event.
--\layout Standard
--
--The Portals API also provides four functions for dealing with event queues:
-- The
--\emph on
--PtlEQAlloc
--\emph default
-- function is used to allocate the API resources needed for an event queue,
-- the
--\emph on
--PtlEQFree
--\emph default
-- function is used to release these resources, the
--\emph on
--PtlEQGet
--\emph default
-- function can be used to get the next event from an event queue, and the
--
--\emph on
--PtlEQWait
--\emph default
-- function can be used to block a process (or thread) until an event queue
-- has at least one event.
--\layout Subsection
--
--Kinds of Events
--\begin_inset LatexCommand \label{sec:ek-type}
--
--\end_inset
--
--
--\layout LyX-Code
--
--typedef enum {
--\newline
-- PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL,
--\newline
-- PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL,
--\newline
-- PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL,
--\newline
-- PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL,
--\newline
-- PTL_EVENT_ACK,
--\newline
-- PTL_EVENT_UNLINK
--\newline
--} ptl_event_kind_t;
--\layout Standard
--\noindent
--The Portals API defines fourteen types of events that can be logged in an
-- event queue:
--\layout Description
--
--PTL_EVENT_GET_START A remote
--\emph on
--get
--\emph default
-- operation has been started on the memory descriptor.
-- The memory region associated with this descriptor should not be altered
-- until the corresponding END or FAIL event is logged.
--\layout Description
--
--PTL_EVENT_GET_END A previously initiated
--\emph on
--get
--\emph default
-- operation completed successfully.
-- This event is logged after the reply has been sent by the local node.
-- As such, the process could free the memory descriptor once it sees this
-- event.
--
--\layout Description
--
--PTL_EVENT_GET_FAIL A previously initiated
--\emph on
--get
--\emph default
-- operation completed unsuccessfully.
-- This event is logged after the reply has been sent by the local node.
-- As such, the process could free the memory descriptor once it sees this
-- event.
--
--\layout Description
--
--PTL_EVENT_PUT_START A remote
--\emph on
--put
--\emph default
-- operation has been started on the memory descriptor.
-- The memory region associated with this descriptor should should be considered
-- volatile until the corresponding END or FAIL event is logged.
--\layout Description
--
--PTL_EVENT_PUT_END A previously initiated
--\emph on
--put
--\emph default
-- operation completed successfully.
-- The underlying layers will not alter the memory (on behalf of this operation)
-- once this event has been logged.
--
--\layout Description
--
--PTL_EVENT_PUT_FAIL A previously initiated
--\emph on
--put
--\emph default
-- operation completed unsuccessfully.
-- The underlying layers will not alter the memory (on behalf of this operation)
-- once this event has been logged.
--
--\layout Description
--
--PTL_EVENT_REPLY_START A
--\emph on
--reply
--\emph default
-- operation has been started on the memory descriptor.
--
--\layout Description
--
--PTL_EVENT_REPLY_END A previously initiated
--\emph on
--reply
--\emph default
-- operation has completed successfully .
-- This event is logged after the data (if any) from the reply has been written
-- into the memory descriptor.
--
--\layout Description
--
--PTL_EVENT_REPLY_FAIL A previously initiated
--\emph on
--reply
--\emph default
-- operation has completed unsuccessfully.
-- This event is logged after the data (if any) from the reply has been written
-- into the memory descriptor.
--
--\layout Description
--
--PTL_EVENT_ACK An
--\emph on
--acknowledgement
--\emph default
-- was received.
-- This event is logged when the acknowledgement is received
--\layout Description
--
--PTL_EVENT_SEND_START An outgoing
--\emph on
--send
--\emph default
-- operation has been started.
-- The memory region associated with this descriptor should not be altered
-- until the corresponding END or FAIL event is logged.
--\layout Description
--
--PTL_EVENT_SEND_END A previously initiated
--\emph on
--send
--\emph default
-- operation has completed successfully.
-- This event is logged after the entire buffer has been sent and it is safe
-- for the application to reuse the buffer.
--
--\layout Description
--
--PTL_EVENT_SEND_FAIL A previously initiated
--\emph on
--send
--\emph default
-- operation has completed unsuccessfully.
-- The process can safely manipulate the memory or free the memory descriptor
-- once it sees this event.
--\layout Description
--
--PTL_EVENT_UNLINK A memory descriptor associated with this event queue has
-- been automatically unlinked.
-- This event is not generated when a memory descriptor is explicitly unlinked
-- by calling
--\shape italic
--PtlMDUnlink
--\shape default
--.
-- This event does not decrement the threshold count.
--\layout Subsection
--
--Event Ordering
--\layout Standard
--
--The Portals API guarantees that a when a process initiates two operations
-- on a remote process, the operations will be initiated on the remote process
-- in the same order that they were initiated on the original process.
-- As an example, if process A intitates two
--\emph on
--put
--\emph default
-- operations,
--\emph on
--x
--\emph default
-- and
--\emph on
--y
--\emph default
--, on process B, the Portals API guarantees that process A will receive the
--
--\family typewriter
--PTL_EVENT_SEND_START
--\family default
-- events for
--\emph on
--x
--\emph default
-- and
--\emph on
--y
--\emph default
-- in the same order that process B receives the
--\family typewriter
--PTL_EVENT_PUT_START
--\family default
-- events for
--\emph on
--x
--\emph default
-- and
--\emph on
--y
--\emph default
--.
-- Notice that the API does not guarantee that the start events will be delivered
-- in the same order that process A initiated the
--\emph on
--x
--\emph default
-- and
--\emph on
--y
--\emph default
-- operations.
-- If process A needs to ensure the ordering of these operations, it should
-- include code to wait for the initiation of
--\emph on
--x
--\emph default
-- before it initiates
--\emph on
--y
--\emph default
--.
--\layout Subsection
--
--Failure Notification
--\layout Standard
--
--Operations may fail to complete successfully; however, unless the node itself
-- fails, every operation that is started will eventually complete.
-- While an operation is in progress, the memory associated with the operation
-- should not be viewed (in the case of a put or a reply) or altered (in the
-- case of a send or get).
-- Operation completion, whether successful or unsuccessful, is final.
-- That is, when an operation completes, the memory associated with the operation
-- will no longer be read or altered by the operation.
-- A network interface can use the
--\family typewriter
--ptl_ni_fail_t
--\family default
-- to define more specific information regarding the failure of the operation
-- and record this information in the
--\family typewriter
--ni_fail_type
--\family default
-- field of the event.
--\layout Subsection
--
--The Event Type
--\begin_inset LatexCommand \label{sec:event-type}
--
--\end_inset
--
--
--\layout LyX-Code
--
--typedef struct {
--\newline
-- ptl_event_kind_t type;
--\newline
-- ptl_process_id_t initiator;
--\newline
-- ptl_uid_t uid;
--\layout LyX-Code
--
-- ptl_pt_index_t portal;
--\newline
-- ptl_match_bits_t match_bits;
--\newline
-- ptl_size_t rlength;
--\newline
-- ptl_size_t mlength;
--\newline
-- ptl_size_t offset;
--\newline
-- ptl_handle_md_t md_handle;
--\newline
-- ptl_md_t mem_desc;
--\newline
-- ptl_hdr_data_t hdr_data;
--\newline
-- ptl_seq_t link;
--\newline
-- ptl_ni_fail_t ni_fail_type;
--\newline
-- volatile ptl_seq_t sequence;
--\newline
--} ptl_event_t;
--\layout Standard
--\noindent
--An event structure includes the following members:
--\layout Description
--
--type Indicates the type of the event.
--
--\layout Description
--
--initiator The id of the initiator.
--
--\layout Description
--
--portal The Portal table index specified in the request.
--
--\layout Description
--
--match_bits A copy of the match bits specified in the request.
-- See section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:me}
--
--\end_inset
--
-- for more information on match bits.
--
--\layout Description
--
--rlength The length (in bytes) specified in the request.
--
--\layout Description
--
--mlength The length (in bytes) of the data that was manipulated by the operation.
-- For truncated operations, the manipulated length will be the number of
-- bytes specified by the memory descriptor (possibly with an offset) operation.
-- For all other operations, the manipulated length will be the length of
-- the requested operation.
--
--\layout Description
--
--offset Is the displacement (in bytes) into the memory region that the operation
-- used.
-- The offset can be determined by the operation (see Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:datamovement}
--
--\end_inset
--
--) for a remote managed memory descriptor, or by the local memory descriptor
-- (see Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:md}
--
--\end_inset
--
--).
--
--\layout Description
--
--md_handle Is the handle to the memory descriptor associated with the event.
--\layout Description
--
--mem_desc Is the state of the memory descriptor immediately after the event
-- has been processed.
--
--\layout Description
--
--hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--).
--
--\layout Description
--
--link The
--\emph on
--link
--\emph default
-- member is used to link
--\family typewriter
--START
--\family default
-- events with the
--\family typewriter
--END
--\family default
-- or
--\family typewriter
--FAIL
--\family default
-- event that signifies completion of the operation.
-- The
--\emph on
--link
--\emph default
-- member will be the same for the two events associated with an operation.
-- The link member is also used to link an
--\family typewriter
--UNLINK
--\family default
-- event with the event that caused the memory descriptor to be unlinked.
--\layout Description
--
--sequence The sequence number for this event.
-- Sequence numbers are unique to each event.
--\layout Comment
--
--The
--\emph on
--sequence
--\emph default
-- member is the last member and is volatile to support SMP implementations.
-- When an event structure is filled in, the
--\emph on
--sequence
--\emph default
-- member should be written after all other members have been updated.
-- Moreover, a memory barrier should be inserted between the updating of other
-- members and the updating of the
--\emph on
--sequence
--\emph default
-- member.
--\layout Subsection
--
--PtlEQAlloc
--\begin_inset LatexCommand \label{sec:eqalloc}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlEQAlloc( ptl_handle_ni_t interface,
--\newline
-- ptl_size_t count,
--\newline
-- ptl_handle_eq_t* handle );
--\layout Standard
--\noindent
--The
--\emph on
--PtlEQAlloc
--\emph default
-- function is used to build an event queue.
--
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--interface
--\family default
-- is not a valid network interface handle.
--
--\layout Description
--
--PTL_NOSPACE Indicates that there is insufficient memory to allocate the
-- event queue.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--handle
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="3" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the interface with which the event queue will be associated.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--count
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The number of events that can be stored in the event queue.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--handle
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold a handle for the newly created
-- event queue.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlEQFree
--\begin_inset LatexCommand \label{sec:eqfree}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlEQFree( ptl_handle_eq_t eventq );
--\layout Standard
--\noindent
--The
--\emph on
--PtlEQFree
--\emph default
-- function releases the resources associated with an event queue.
-- It is up to the user to insure that no memory descriptors are associated
-- with the event queue once it is freed.
--
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_EQ Indicates that
--\family typewriter
--eventq
--\family default
-- is not a valid event queue handle.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="1" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--eventq
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--A handle for the event queue to be released.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlEQGet
--\begin_inset LatexCommand \label{sec:eqget}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlEQGet( ptl_handle_eq_t eventq,
--\newline
-- ptl_event_t* event );
--\layout Standard
--\noindent
--The
--\emph on
--PTLEQGet
--\emph default
-- function is a nonblocking function that can be used to get the next event
-- in an event queue.
-- The event is removed from the queue.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
-- least one event between this event and the last event obtained (using
--\emph on
--PtlEQGet
--\emph default
-- or
--\emph on
--PtlEQWait
--\emph default
--) from this event queue has been dropped due to limited space in the event
-- queue.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_EQ_EMPTY Indicates that
--\family typewriter
--eventq
--\family default
-- is empty or another thread is waiting on
--\emph on
--PtlEQWait
--\emph default
--.
--
--\layout Description
--
--PTL_INV_EQ Indicates that
--\family typewriter
--eventq
--\family default
-- is not a valid event queue handle.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--event
--\family default
-- is not a legal address.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="2" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.5in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--eventq
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the event queue.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--event
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold the values associated with
-- the next event in the event queue.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlEQWait
--\begin_inset LatexCommand \label{sec:eqwait}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlEQWait( ptl_handle_eq_t eventq,
--\newline
-- ptl_event_t* event );
--\layout Standard
--\noindent
--The
--\emph on
--PTLEQWait
--\emph default
-- function can be used to block the calling process (thread) until there
-- is an event in an event queue.
-- This function also returns the next event in the event queue and removes
-- this event from the queue.
-- This is the only blocking operation in the Portals 3.2 API.
-- In the event that multiple threads are waiting on the same event queue,
-- PtlEQWait is guaranteed to wake exactly one thread, but the order in which
-- they are awakened is not specified.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
-- least one event between this event and the last event obtained (using
--\emph on
--PtlEQGet
--\emph default
-- or
--\emph on
--PtlEQWait
--\emph default
--) from this event queue has been dropped due to limited space in the event
-- queue.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_EQ Indicates that
--\family typewriter
--eventq
--\family default
-- is not a valid event queue handle.
--
--\layout Description
--
--PTL_SEGV Indicates that
--\family typewriter
--event
--\family default
-- is not a legal address.
-- queue handle.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--\noindent
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="2" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--eventq
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the event queue to wait on.
-- The calling process (thread) will be blocked until
--\family typewriter
--eventq
--\family default
-- is not empty.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--event
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--output
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--On successful return, this location will hold the values associated with
-- the next event in the event queue.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Section
--
--The Access Control Table
--\begin_inset LatexCommand \label{sec:ac}
--
--\end_inset
--
--
--\layout Standard
--
--Processes can use the access control table to control which processes are
-- allowed to perform operations on Portal table entries.
-- Each communication interface has a Portal table and an access control table.
-- The access control table for the default interface contains an entry at
-- index zero that allows all processes with the same user id to communicate.
-- Entries in the access control table can be manipulated using the
--\emph on
--PtlACEntry
--\emph default
-- function.
--\layout Subsection
--
--PtlACEntry
--\begin_inset LatexCommand \label{sec:acentry}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlACEntry( ptl_handle_ni_t interface,
--\newline
-- ptl_ac_index_t index,
--\newline
-- ptl_process_id_t matchid,
--\newline
-- ptl_uid_t user_id,
--\newline
-- ptl_pt_index_t portal );
--\layout Standard
--\noindent
--The
--\emph on
--PtlACEntry
--\emph default
-- function can be used to update an entry in the access control table for
-- an interface.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_NI Indicates that
--\family typewriter
--interface
--\family default
-- is not a valid network interface handle.
--
--\layout Description
--
--PTL_AC_INV_INDEX Indicates that
--\family typewriter
--index
--\family default
-- is not a valid access control table index.
--
--\layout Description
--
--PTL_INV_PROC Indicates that
--\family typewriter
--matchid
--\family default
-- is not a valid process identifier.
--
--\layout Description
--
--PTL_PT_INV_INDEX Indicates that
--\family typewriter
--portal
--\family default
-- is not a valid Portal table index.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="5" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--interface
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Identifies the interface to use.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--index
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The index of the entry in the access control table to update.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--matchid
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Identifies the process(es) that are allowed to perform operations.
-- The constants
--\family typewriter
--PTL_PID_ANY
--\family default
-- and
--\family typewriter
--PTL_NID_ANY
--\family default
-- can be used to wildcard either of the ids in the
--\family typewriter
--ptl_process_id_t
--\family default
-- structure.
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--user_id
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Identifies the user that is allowed to perform operations.
-- The value
--\family typewriter
--PTL_UID_ANY
--\family default
-- can be used to wildcard the user.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--portal
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Identifies the Portal index(es) that can be used.
-- The value
--\family typewriter
--PTL_PT_INDEX_ANY
--\family default
-- can be used to wildcard the Portal index.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Section
--
--Data Movement Operations
--\begin_inset LatexCommand \label{sec:datamovement}
--
--\end_inset
--
--
--\layout Standard
--
--The Portals API provides two data movement operations:
--\emph on
--PtlPut
--\emph default
-- and
--\emph on
--PtlGet
--\emph default
--.
--\layout Subsection
--
--PtlPut
--\begin_inset LatexCommand \label{sec:put}
--
--\end_inset
--
--
--\layout LyX-Code
--
--typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t;
--\newline
--
--\newline
--int PtlPut( ptl_handle_md_t mem_desc,
--\newline
-- ptl_ack_req_t ack_req,
--\newline
-- ptl_process_id_t target,
--\newline
-- ptl_pt_index_t portal,
--\newline
-- ptl_ac_index_t cookie,
--\newline
-- ptl_match_bits_t match_bits,
--\newline
-- ptl_size_t offset,
--\newline
-- ptl_hdr_data_t hdr_data );
--\layout Standard
--\noindent
--Values of the type
--\family typewriter
--ptl_ack_req_t
--\family default
-- are used to control whether an acknowledgement should be sent when the
-- operation completes (i.e., when the data has been written to a memory descriptor
-- of the
--\family typewriter
--target
--\family default
-- process).
-- The value
--\family typewriter
--PTL_ACK_REQ
--\family default
-- requests an acknowledgement, the value
--\family typewriter
--PTL_NOACK_REQ
--\family default
-- requests that no acknowledgement should be generated.
--\layout Standard
--
--The
--\emph on
--PtlPut
--\emph default
-- function initiates an asynchronous put operation.
-- There are several events associated with a put operation: initiation of
-- the send on the local node (
--\family typewriter
--PTL_EVENT_SEND_START
--\family default
--), completion of the send on the local node (
--\family typewriter
--PTL_EVENT_SEND_END
--\family default
-- or
--\family typewriter
--PTL_EVENT_SEND_FAIL
--\family default
--), and, when the send completes successfully, the receipt of an acknowledgement
-- (
--\family typewriter
--PTL_EVENT_ACK
--\family default
--) indicating that the operation was accepted by the target.
-- These events will be logged in the event queue associated with the memory
-- descriptor (
--\family typewriter
--mem_desc
--\family default
--) used in the put operation.
-- Using a memory descriptor that does not have an associated event queue
-- results in these events being discarded.
-- In this case, the application must have another mechanism (e.g., a higher
-- level protocol) for determining when it is safe to modify the memory region
-- associated with the memory descriptor.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_MD Indicates that
--\family typewriter
--mem_desc
--\family default
-- is not a valid memory descriptor.
--
--\layout Description
--
--PTL_INV_PROC Indicates that
--\family typewriter
--target
--\family default
-- is not a valid process id.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="8" columns="3">
--<features>
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the memory descriptor that describes the memory to be sent.
-- If the memory descriptor has an event queue associated with it, it will
-- be used to record events when the message has been sent (PTL_EVENT_SEND_START,
-- PTL_EVENT_SEND_END).
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ack_req
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--Controls whether an acknowledgement event is requested.
-- Acknowledgements are only sent when they are requested by the initiating
-- process
--\series bold
--and
--\series default
-- the memory descriptor has an event queue
--\series bold
--and
--\series default
-- the target memory descriptor enables them.
-- Allowed constants:
--\family typewriter
--PTL_ACK_REQ
--\family default
--,
--\family typewriter
--PTL_NOACK_REQ
--\family default
--.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--target
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A process id for the target process.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--portal
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The index in the remote Portal table.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--cookie
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The index into the access control table of the target process.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--match_bits
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The match bits to use for message selection at the target process.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--offset
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The offset into the target memory descriptor (only used when the target
-- memory descriptor has the
--\family typewriter
--PTL_MD_MANAGE_REMOTE
--\family default
-- option set).
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--hdr_data
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--64 bits of user data that can be included in message header.
-- This data is written to an event queue entry at the target if an event
-- queue is present on the matching memory descriptor.
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Subsection
--
--PtlGet
--\begin_inset LatexCommand \label{sec:get}
--
--\end_inset
--
--
--\layout LyX-Code
--
--int PtlGet( ptl_handle_md_t mem_desc,
--\newline
-- ptl_process_id_t target,
--\newline
-- ptl_pt_index_t portal,
--\newline
-- ptl_ac_index_t cookie,
--\newline
-- ptl_match_bits_t match_bits,
--\newline
-- ptl_size_t offset );
--\layout Standard
--\noindent
--The
--\emph on
--PtlGet
--\emph default
-- function initiates a remote read operation.
-- There are two event pairs associated with a get operation , when the data
-- is sent from the remote node, a
--\family typewriter
--PTL_EVENT_GET{START|END}
--\family default
-- event pair is registered on the remote node; and when the data is returned
-- from the remote node a
--\family typewriter
--PTL_EVENT_REPLY{START|END}
--\family default
-- event pair is registered on the local node.
--\layout Subsubsection
--
--Return Codes
--\layout Description
--
--PTL_OK Indicates success.
--
--\layout Description
--
--PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
--
--\layout Description
--
--PTL_INV_MD Indicates that
--\family typewriter
--mem_desc
--\family default
-- is not a valid memory descriptor.
--
--\layout Description
--
--PTL_INV_PROC Indicates that
--\family typewriter
--target
--\family default
-- is not a valid process id.
--
--\layout Subsubsection
--
--Arguments
--\layout Standard
--
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="6" columns="3">
--<features>
--<column alignment="right" valignment="top" width="0pt">
--<column alignment="center" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="4.7in">
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A handle for the memory descriptor that describes the memory into which
-- the requested data will be received.
-- The memory descriptor can have an event queue associated with it to record
-- events, such as when the message receive has started (
--\family typewriter
--PTL_EVENT_REPLY
--\family default
--_
--\family typewriter
--START
--\family default
--).
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--target
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--A process id for the target process.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--portal
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The index in the remote Portal table.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--cookie
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The index into the access control table of the target process.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--match_bits
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The match bits to use for message selection at the target process.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--offset
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--input
--\end_inset
--</cell>
--<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--The offset into the target memory descriptor (only used when the target
-- memory descriptor has the
--\family typewriter
--PTL_MD_MANAGE_REMOTE
--\family default
-- option set).
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\layout Section
--
--Summary
--\layout Standard
--
--
--\begin_inset LatexCommand \label{sec:summary}
--
--\end_inset
--
-- We conclude this section by summarizing the names introduced by the Portals
-- 3.2 API.
-- We start by summarizing the names of the types introduced by the API.
-- This is followed by a summary of the functions introduced by the API.
-- Which is followed by a summary of the function return codes.
-- Finally, we conclude with a summary of the other constant values introduced
-- by the API.
--\layout Standard
--
--Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:types}
--
--\end_inset
--
-- presents a summary of the types defined by the Portals API.
-- The first column in this table gives the type name, the second column gives
-- a brief description of the type, the third column identifies the section
-- where the type is defined, and the fourth column lists the functions that
-- have arguments of this type.
--\layout Standard
--
--
--\begin_inset Float table
--placement htbp
--wide false
--collapsed false
--
--\layout Caption
--
--Types Defined by the Portals 3.2 API
--\begin_inset LatexCommand \label{tab:types}
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--medskip
--\end_inset
--
--
--\layout Standard
--\noindent
--
--\size small
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="25" columns="4">
--<features firstHeadEmpty="true">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="2in">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="2.2in">
--<row bottomline="true">
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
-- Name
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
-- Meaning
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
-- Sect
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
-- Functions
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ac_index_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--indexes for an access control table
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:index-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlACEntry, PtlPut, PtlGet
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ack_req_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--acknowledgement request types
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlPut
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--kinds of events
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlGet
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--information about events
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:event-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlEQGet
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--plt_seq_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--event sequence number
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:event-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--PtlEQGet, PtlEQWait
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_handle_any_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--handles for any object
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:handle-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIHandle
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_handle_eq_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--handles for event queues
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:handle-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_handle_md_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--handles for memory descriptors
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:handle-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert,
-- PtlPut, PtlGet
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_handle_me_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--handles for match entries
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:handle-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_handle_ni_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--handles for network interfaces
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:handle-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut,
-- PtlGet
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_nid_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--node identifiers
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:id-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlGetId,PtlACEntry
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_pid_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--process identifier
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:id-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--PtlGetId, PtlACEntry
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_uid_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--user indentifier
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:id-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--PtlGetUid, PtlACEntry
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ins_pos_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--insertion position (before or after)
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMEAttach, PtlMEAttachAny, PtlMEInsert
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_interface_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--identifiers for network interfaces
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ni-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIInit
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_match_bits_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--match (and ignore) bits
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:mb-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_md_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--memory descriptors
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMDAttach, PtlMDUpdate
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ni_fail_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--network interface-specific failures
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:eq}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--PtlEQGet, PtlEQWait
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_process_id_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--process identifiers
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:pid-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_pt_index_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--indexes for Portal tables
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:index-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMEAttach, PtlMEAttachAny, PtlACEntry
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_size_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--sizes
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:size-t}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlEQAlloc, PtlPut, PtlGet
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_sr_index_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--indexes for status registers
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:stat-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIStatus
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_sr_value_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--values in status registers
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:stat-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIStatus
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_unlink_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--unlink options
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:func}
--
--\end_inset
--
-- presents a summary of the functions defined by the Portals API.
-- The first column in this table gives the name for the function, the second
-- column gives a brief description of the operation implemented by the function,
-- and the third column identifies the section where the function is defined.
--\layout Standard
--
--
--\begin_inset Float table
--placement htbp
--wide false
--collapsed false
--
--\layout Caption
--
--Functions Defined by the Portals 3.2 API
--\begin_inset LatexCommand \label{tab:func}
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--medskip
--\end_inset
--
--
--\layout Standard
--\align center
--
--\size small
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="24" columns="3">
--<features firstHeadEmpty="true">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<row bottomline="true">
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--Name
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- Operation
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- Section
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--PtlACEntry
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- update an entry in an access control table
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ac}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlEQAlloc
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- create an event queue
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:eq}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlEQGet
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- get the next event from an event queue
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:eq}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlEQFree
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- release the resources for an event queue
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:eq}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlEQWait
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- wait for a new event in an event queue
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:eq}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlFini
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- shutdown the Portals API
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:init}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlGet
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- perform a get operation
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:datamovement}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlGetId
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- get the id for the current process
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:pid}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlInit
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- initialize the Portals API
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:init}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlMDAttach
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- create a memory descriptor and attach it to a match entry
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlMDBind
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- create a free-floating memory descriptor
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:mdbind}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlMDUnlink
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- remove a memory descriptor from a list and release its resources
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlMDUpdate
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- update a memory descriptor
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlMEAttach
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--create a match entry and attach it to a Portal table
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:me}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--PtlMEAttachAny
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--create a match entry and attach it to a free Portal table entry
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:attachany}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlMEInsert
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- create a match entry and insert it in a list
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:me}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlMEUnlink
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- remove a match entry from a list and release its resources
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:me}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlNIDist
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- get the distance to another process
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ni}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlNIFini
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- shutdown a network interface
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ni}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlNIHandle
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- get the network interface handle for an object
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ni}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlNIInit
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- initialize a network interface
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ni}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlNIStatus
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- read a network interface status register
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ni}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- PtlPut
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- perform a put operation
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:datamovement}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:retcodes}
--
--\end_inset
--
-- summarizes the return codes used by functions defined by the Portals API.
-- All of these constants are integer values.
-- The first column of this table gives the symbolic name for the constant,
-- the second column gives a brief description of the value, and the third
-- column identifies the functions that can return this value.
--\layout Standard
--
--
--\begin_inset Float table
--placement htbp
--wide false
--collapsed false
--
--\layout Caption
--
--Function Return Codes for the Portals 3.2 API
--\begin_inset LatexCommand \label{tab:retcodes}
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--medskip
--\end_inset
--
--
--\layout Standard
--\align center
--
--\size small
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="27" columns="3">
--<features>
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="2.6in">
--<row bottomline="true">
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Name
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Meaning
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Functions
--\series default
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_AC_INV_INDEX
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid access control table index
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlACEntry
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EQ_DROPPED
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--at least one event has been dropped
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlEQGet, PtlWait
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EQ_EMPTY
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--no events available in an event queue
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlEQGet
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_FAIL
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--error during initialization or cleanup
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlInit, PtlFini
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_ILL_MD
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--illegal memory descriptor values
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMDAttach, PtlMDBind, PtlMDUpdate
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INIT_DUP
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--duplicate initialization of an interface
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIInit
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INIT_INV
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--initialization of an invalid interface
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIInit
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INUSE
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--the ME already has an MD
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMDAttach
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_ASIZE
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid access control table size
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIInit
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_EQ
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid event queue handle
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMDUpdate, PtlEQFree, PtlEQGet
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_HANDLE
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid handle
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIHandle
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_MD
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid memory descriptor handle
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMDUnlink, PtlMDUpdate
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_ME
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid match entry handle
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlMDAttach
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_NI
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid network interface handle
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_PROC
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid process identifier
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_PTINDEX
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid Portal table index
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlMEAttach
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_REG
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid status register
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlNIStatus
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INV_SR_INDX
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--invalid status register index
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlNIStatus
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_ML_TOOLONG
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--match list too long
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlMEAttach, PtlMEInsert
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_MD_INUSE
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--MD has pending operations
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--PtlMDUnlink
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_NOINIT
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--uninitialized API
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\emph on
--all
--\emph default
--, except PtlInit
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_NOSPACE
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--insufficient memory
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_NOUPDATE
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- no update was performed
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
-- PtlMDUpdate
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_PT_FULL
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--Portal table is full
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--PtlMEAttachAny
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_OK
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- success
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--
--\emph on
--all
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_SEGV
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--addressing violation
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--\noindent
--PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate,
-- PtlEQAlloc, PtlEQGet, PtlEQWait
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:oconsts}
--
--\end_inset
--
-- summarizes the remaining constant values introduced by the Portals API.
-- The first column in this table presents the symbolic name for the constant,
-- the second column gives a brief description of the value, the third column
-- identifies the type for the value, and the fourth column identifies the
-- sections in which the value is mentioned.
--\layout Standard
--
--
--\begin_inset Float table
--placement htbp
--wide false
--collapsed false
--
--\layout Caption
--
--Other Constants Defined by the Portals 3.2 API
--\begin_inset LatexCommand \label{tab:oconsts}
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--medskip
--\end_inset
--
--
--\layout Standard
--\align center
--
--\size small
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="36" columns="5">
--<features>
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<row bottomline="true">
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Name
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Meaning
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Base type
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Intr.
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Ref.
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_ACK_REQ
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--request an acknowledgement
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ack_req_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EQ_NONE
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--a NULL event queue handle
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_handle_eq_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:handle-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md}
--
--\end_inset
--
--,
--\begin_inset LatexCommand \ref{sec:mdupdate}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_GET_START
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--get event start
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:get}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_GET_END
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--get event end
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:get}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_GET_FAIL
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--get event fail
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:get}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_PUT_START
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--put event start
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_PUT_END
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--put event end
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_PUT_FAIL
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--put event fail
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_REPLY_START
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--reply event start
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:get}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_REPLY_END
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--reply event end
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:get}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_REPLY_FAIL
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--reply event fail
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:get}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_ACK_START
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--acknowledgement event start
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_ACK_END
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--acknowledgement event end
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_ACK_FAIL
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--acknowledgement event fail
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_SEND_START
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--send event start
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_SEND_END
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--send event end
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_SEND_FAIL
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--send event fail
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_EVENT_UNLINK
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--unlink event
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_event_kind_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ek-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_PID_ANY
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--wildcard for process id fields
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_pid_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:id-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
--,
--\begin_inset LatexCommand \ref{sec:acentry}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_NID_ANY
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--wildcard for node id fields
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_nid_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:id-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
--,
--\begin_inset LatexCommand \ref{sec:acentry}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_UID_ANY
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--wildcard for user id
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_uid_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:id-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:meattach}
--
--\end_inset
--
--,
--\begin_inset LatexCommand \ref{sec:acentry}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_IFACE_DEFAULT
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--default interface
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_interface_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:ni-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INS_AFTER
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--insert after
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ins_pos_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:meinsert}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_INS_BEFORE
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--insert before
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ins_pos_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:meinsert}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_MD_ACK_DISABLE
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--a flag to disable acknowledgements
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_MD_MANAGE_REMOTE
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--a flag to enable the use of remote offsets
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--,
--\begin_inset LatexCommand \ref{sec:get}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_MD_OP_GET
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--a flag to enable get operations
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_MD_OP_PUT
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--a flag to enable put operations
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_MD_THRESH_INF
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--infinite threshold for a memory descriptor
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_MD_TRUNCATE
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--a flag to enable truncation of a request
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:md-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_NOACK_REQ
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--request no acknowledgement
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ack_req_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:put}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_PT_INDEX_ANY
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--wildcard for Portal indexes
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_pt_index_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:acentry}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_RETAIN
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--disable unlinking
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_unlink_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:mdattach}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_SR_DROP_COUNT
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--index for the dropped count register
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_sr_index_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:stat-type}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:nistatus}
--
--\end_inset
--
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--PTL_UNLINK
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--enable unlinking
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_unlink_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\begin_inset LatexCommand \ref{sec:mdattach}
--
--\end_inset
--
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Chapter
--
--The Semantics of Message Transmission
--\begin_inset LatexCommand \label{sec:semantics}
--
--\end_inset
--
--
--\layout Standard
--
--The portals API uses four types of messages: put requests, acknowledgements,
-- get requests, and replies.
-- In this section, we describe the information passed on the wire for each
-- type of message.
-- We also describe how this information is used to process incoming messages.
--\layout Section
--
--Sending Messages
--\layout Standard
--
--Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:put-wire}
--
--\end_inset
--
-- summarizes the information that is transmitted for a put request.
-- The first column provides a descriptive name for the information, the second
-- column provides the type for this information, the third column identifies
-- the source of the information, and the fourth column provides additional
-- notes.
-- Most information that is transmitted is obtained directly from the
--\emph on
--PtlPut
--\emph default
-- operation.
-- Notice that the handle for the memory descriptor used in the
--\emph on
--PtlPut
--\emph default
-- operation is transmitted even though this value cannot be interpreted by
-- the target.
-- A value of anything other than
--\family typewriter
--PTL_MD_NONE
--\family default
--, is interpreted as a request for an acknowledgement.
--\layout Standard
--
--
--\begin_inset Float table
--placement htbp
--wide false
--collapsed false
--
--\layout Caption
--
--Information Passed in a Put Request
--\begin_inset LatexCommand \label{tab:put-wire}
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--medskip
--\end_inset
--
--
--\layout Standard
--\align center
--
--\size small
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="12" columns="4">
--<features firstHeadEmpty="true">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<row bottomline="true">
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Information
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Type
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--\emph on
--PtlPut
--\emph default
-- arg
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Notes
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--operation
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--indicates a put request
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--initiator
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_process_id_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--local information
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--user
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_uid_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--local information
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--target
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_process_id_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--target
--\family default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--portal index
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_pt_index_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--portal
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--cookie
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ac_index_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--cookie
--\family default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--match bits
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_match_bits_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--match_bits
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--offset
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_size_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--offset
--\family default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--memory desc
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_handle_md_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\family default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--no ack if
--\family typewriter
--PTL_MD_NONE
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--length
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_size_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--length
--\family default
-- member
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--data
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family roman
--\emph on
--bytes
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--start
--\family default
-- and
--\family typewriter
--length
--\family default
-- members
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:ack-wire}
--
--\end_inset
--
-- summarizes the information transmitted in an acknowledgement.
-- Most of the information is simply echoed from the put request.
-- Notice that the initiator and target are obtained directly from the put
-- request, but are swapped in generating the acknowledgement.
-- The only new piece of information in the acknowledgement is the manipulated
-- length which is determined as the put request is satisfied.
--\layout Standard
--
--
--\begin_inset Float table
--placement htbp
--wide false
--collapsed false
--
--\layout Caption
--
--Information Passed in an Acknowledgement
--\begin_inset LatexCommand \label{tab:ack-wire}
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--medskip
--\end_inset
--
--
--\layout Standard
--\align center
--
--\size small
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="10" columns="4">
--<features firstHeadEmpty="true">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<row bottomline="true">
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Information
--\series default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Type
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Put Information
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Notes
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--operation
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- indicates an acknowledgement
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- initiator
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_process_id_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- target
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- target
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_process_id_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- initiator
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- portal index
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_pt_index_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- portal index
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- match bits
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_match_bits_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- match bits
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- offset
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_size_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- offset
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- memory desc
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
-- ptl_handle_md_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- memory desc
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- requested length
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
-- ptl_size_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- length
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- manipulated length
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
-- ptl_size_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
-- obtained from the operation
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:get-wire}
--
--\end_inset
--
-- summarizes the information that is transmitted for a get request.
-- Like the information transmitted in a put request, most of the information
-- transmitted in a get request is obtained directly from the
--\emph on
--PtlGet
--\emph default
-- operation.
-- Unlike put requests, get requests do not include the event queue handle.
-- In this case, the reply is generated whenever the operation succeeds and
-- the memory descriptor must not be unlinked until the reply is received.
-- As such, there is no advantage to explicitly sending the event queue handle.
--\layout Standard
--
--
--\begin_inset Float table
--placement htbp
--wide false
--collapsed false
--
--\layout Caption
--
--Information Passed in a Get Request
--\begin_inset LatexCommand \label{tab:get-wire}
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--medskip
--\end_inset
--
--
--\layout Standard
--\align center
--
--\size small
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="11" columns="4">
--<features firstHeadEmpty="true">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<row bottomline="true">
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Information
--\series default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Type
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--\emph on
--PtlGet
--\emph default
-- argument
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Notes
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--operation
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--indicates a get operation
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--initiator
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_process_id_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--local information
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--user
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_uid_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--local information
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--target
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_process_id_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--target
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--portal index
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_pt_index_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--portal
--\family default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--cookie
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_ac_index_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--cookie
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--match bits
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_match_bits_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--match_bits
--\family default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--offset
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_size_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--offset
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--memory desc
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_handle_md_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\family default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--length
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_size_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--mem_desc
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--length
--\family default
-- member
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--Table\SpecialChar ~
--
--\begin_inset LatexCommand \ref{tab:reply-wire}
--
--\end_inset
--
-- summarizes the information transmitted in a reply.
-- Like an acknowledgement, most of the information is simply echoed from
-- the get request.
-- The initiator and target are obtained directly from the get request, but
-- are swapped in generating the acknowledgement.
-- The only new information in the acknowledgement are the manipulated length
-- and the data, which are determined as the get request is satisfied.
--\layout Standard
--
--
--\begin_inset Float table
--placement htbp
--wide false
--collapsed false
--
--\layout Caption
--
--Information Passed in a Reply
--\begin_inset LatexCommand \label{tab:reply-wire}
--
--\end_inset
--
--
--\layout Standard
--
--
--\begin_inset ERT
--status Collapsed
--
--\layout Standard
--
--\backslash
--medskip
--\end_inset
--
--
--\layout Standard
--\align center
--
--\size small
--
--\begin_inset Tabular
--<lyxtabular version="3" rows="11" columns="4">
--<features firstHeadEmpty="true">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<column alignment="left" valignment="top" width="0pt">
--<row bottomline="true">
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Information
--\series default
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Type
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Put Information
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\series bold
--Notes
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--operation
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--int
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--indicates an acknowledgement
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--initiator
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_process_id_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--target
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--target
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_process_id_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--initiator
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--portal index
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_pt_index_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--portal index
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" bottomline="true" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--match bits
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_match_bits_t
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--match bits
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--offset
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_size_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--offset
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--memory desc
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_handle_md_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--memory desc
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--requested length
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_size_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--length
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--echo
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--manipulated length
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\family typewriter
--ptl_size_t
--\family default
--
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--obtained from the operation
--\end_inset
--</cell>
--</row>
--<row>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--data
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--
--\emph on
--bytes
--\end_inset
--</cell>
--<cell alignment="left" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--\end_inset
--</cell>
--<cell alignment="right" valignment="top" usebox="none">
--\begin_inset Text
--
--\layout Standard
--
--obtained from the operation
--\end_inset
--</cell>
--</row>
--</lyxtabular>
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Section
--
--Receiving Messages
--\begin_inset LatexCommand \label{sec:receiving}
--
--\end_inset
--
--
--\layout Standard
--
--When an incoming message arrives on a network interface, the communication
-- system first checks that the target process identified in the request is
-- a valid process that has initialized the network interface (i.e., that the
-- target process has a valid Portal table).
-- If this test fails, the communication system discards the message and increment
--s the dropped message count for the interface.
-- The remainder of the processing depends on the type of the incoming message.
-- Put and get messages are subject to access control checks and translation
-- (searching a match list), while acknowledgement and reply messages bypass
-- the access control checks and the translation step.
--\layout Standard
--
--Acknowledgement messages include a handle for the memory descriptor used
-- in the original
--\emph on
--PtlPut
--\emph default
-- operation.
-- This memory descriptor will identify the event queue where the event should
-- be recorded.
-- Upon receipt of an acknowledgement, the runtime system only needs to confirm
-- that the memory descriptor and event queue still exist and that there is
-- space for another event.
-- Should the any of these conditions fail, the message is simply discarded
-- and the dropped message count for the interface is incremented.
-- Otherwise, the system builds an acknowledgement event from the information
-- in the acknowledgement message and adds it to the event queue.
--\layout Standard
--
--Reception of reply messages is also relatively straightforward.
-- Each reply message includes a handle for a memory descriptor.
-- If this descriptor exists, it is used to receive the message.
-- A reply message will be dropped if the memory descriptor identified in
-- the request doesn't exist.
-- In either of this case, the dropped message count for the interface is
-- incremented.
-- These are the only reasons for dropping reply messages.
-- Every memory descriptor accepts and truncates incoming reply messages,
-- eliminating the other potential reasons for rejecting a reply message.
--\layout Standard
--
--The critical step in processing an incoming put or get request involves
-- mapping the request to a memory descriptor.
-- This step starts by using the Portal index in the incoming request to identify
-- a list of match entries.
-- This list of match entries is searched in order until a match entry is
-- found whose match criteria matches the match bits in the incoming request
-- and whose memory descriptor accepts the request.
--\layout Standard
--
--Because acknowledge and reply messages are generated in response to requests
-- made by the process receiving these messages, the checks performed by the
-- runtime system for acknowledgements and replies are minimal.
-- In contrast, put and get messages are generated by remote processes and
-- the checks performed for these messages are more extensive.
-- Incoming put or get messages may be rejected because:
--\layout Itemize
--
--the Portal index supplied in the request is not valid;
--\layout Itemize
--
--the cookie supplied in the request is not a valid access control entry;
--
--\layout Itemize
--
--the access control entry identified by the cookie does not match the identifier
-- of the requesting process;
--\layout Itemize
--
--the access control entry identified by the access control entry does not
-- match the Portal index supplied in the request; or
--\layout Itemize
--
--the match bits supplied in the request do not match any of the match entries
-- with a memory descriptor that accepts the request.
--
--\layout Standard
--
--In all cases, if the message is rejected, the incoming message is discarded
-- and the dropped message count for the interface is incremented.
--\layout Standard
--
--A memory descriptor may reject an incoming request for any of the following
-- reasons:
--\layout Itemize
--
--the
--\family typewriter
--PTL_MD_PUT
--\family default
-- or
--\family typewriter
--PTL_MD_GET
--\family default
-- option has not been enabled and the operation is put or get, respectively;
--
--\layout Itemize
--
--the length specified in the request is too long for the memory descriptor
-- and the
--\family typewriter
--PTL_MD_TRUNCATE
--\family default
-- option has not been enabled.
--\layout Chapter
--
--Examples
--\begin_inset LatexCommand \label{sec:examples}
--
--\end_inset
--
--
--\layout Comment
--
--The examples presented in this chapter have not been updated to reflect
-- the current API.
--\layout Standard
--
--In this section we present several example to illustrate expected usage
-- patterns for the Portals 3.2 API.
-- The first example describes how to implement parallel servers using the
-- features of the Portals 3.2 API.
-- This example covers the access control list and the use of remote managed
-- offsets.
-- The second example presents an approach to dealing with dropped requests.
-- This example covers aspects of match lists and memory descriptors.
-- The final example covers message reception in MPI.
-- This example illustrates more sophisticated uses of matching and a procedure
-- to update a memory descriptor.
--\layout Section
--
--Parallel File Servers
--\begin_inset LatexCommand \label{sec:expfs}
--
--\end_inset
--
--
--\layout Standard
--
--Figure\SpecialChar ~
--
--\begin_inset LatexCommand \ref{fig:file}
--
--\end_inset
--
-- illustrates the logical structure of a parallel file server.
-- In this case, the parallel server consists of four servers that stripe
-- application data across four disks.
-- We would like to present applications with the illusion that the file server
-- is a single entity.
-- We will assume that all of the processes that constitute the parallel server
-- have the same user id.
--\layout Standard
--
--
--\begin_inset Float figure
--placement htbp
--wide false
--collapsed false
--
--\layout Standard
--\align center
--
--\begin_inset Graphics FormatVersion 1
-- filename file.eps
-- display color
-- size_type 0
-- rotateOrigin center
-- lyxsize_type 1
-- lyxwidth 196pt
-- lyxheight 147pt
--\end_inset
--
--
--\layout Caption
--
--Parallel File Server
--\begin_inset LatexCommand \label{fig:file}
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--When an application establishes a connection to the parallel file server,
-- it will allocate a Portal and access control list entry for communicating
-- with the server.
-- The access control list entry will include the Portal and match any process
-- in the parallel file server's, so all of the file server processes will
-- have access to the portal.
-- The Portal information and access control entry will be sent to the file
-- server at this time.
-- If the application and server need to have multiple, concurrent I/O operations,
-- they can use additional portals or match entries to keep the operations
-- from interfering with one another.
--\layout Standard
--
--When an application initiates an I/O operation, it first builds a memory
-- descriptor that describes the memory region involved in the operation.
-- This memory descriptor will enable the appropriate operation (put for read
-- operations and get for write operations) and enable the use of remote offsets
-- (this lets the servers decide where their data should be placed in the
-- memory region).
-- After creating the memory descriptor and linking it into the appropriate
-- Portal entry, the application sends a read or write request (using
--\emph on
--PtlPut
--\emph default
--) to one of the file server processes.
-- The file server processes can then use put or get operations with the appropria
--te offsets to fill or retrieve the contents of the application's buffer.
-- To know when the operation has completed, the application can add an event
-- queue to the memory descriptor and add up the lengths of the remote operations
-- until the sum is the size of the requested I/O operation.
--\layout Section
--
--Dealing with Dropped Requests
--\begin_inset LatexCommand \label{sec:exdrop}
--
--\end_inset
--
--
--\layout Standard
--
--If a process does not anticipate unexpected requests, they will be discarded.
-- Applications using the Portals API can query the dropped count for the
-- interface to determine the number of requests that have been dropped (see
-- Section\SpecialChar ~
--
--\begin_inset LatexCommand \ref{sec:nistatus}
--
--\end_inset
--
--).
-- While this approach minimizes resource consumption, it does not provide
-- information that might be critical in debugging the implementation of a
-- higher level protocol.
--\layout Standard
--
--To keep track of more information about dropped requests, we use a memory
-- descriptor that truncates each incoming request to zero bytes and logs
-- the
--\begin_inset Quotes eld
--\end_inset
--
--dropped
--\begin_inset Quotes erd
--\end_inset
--
-- operations in an event queue.
-- Note that the operations are not dropped in the Portals sense, because
-- the operation succeeds.
--\layout Standard
--
--The following code fragment illustrates an implementation of this approach.
-- In this case, we assume that a thread is launched to execute the function
--
--\family typewriter
--watch_drop
--\family default
--.
-- This code starts by building an event queue to log truncated operations
-- and a memory descriptor to truncate the incoming requests.
-- This example only captures
--\begin_inset Quotes eld
--\end_inset
--
--dropped
--\begin_inset Quotes erd
--\end_inset
--
-- requests for a single portal.
-- In a more realistic situation, the memory descriptor would be appended
-- to the match list for every portal.
-- We also assume that the thread is capable of keeping up with the
--\begin_inset Quotes eld
--\end_inset
--
--dropped
--\begin_inset Quotes erd
--\end_inset
--
-- requests.
-- If this is not the case, we could use a finite threshold on the memory
-- descriptor to capture the first few dropped requests.
--\layout LyX-Code
--
--
--\size small
--#include <stdio.h>
--\newline
--#include <stdlib.h>
--\newline
--#include <portals.h>
--\newline
--
--\newline
--#define DROP_SIZE 32 /* number of dropped requests to track */
--\newline
--
--\newline
--int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) {
--\newline
-- ptl_handle_eq_t drop_events;
--\newline
-- ptl_event_t event;
--\newline
-- ptl_handle_md_t drop_em;
--\newline
-- ptl_md_t drop_desc;
--\newline
-- ptl_process_id_t any_proc;
--\newline
-- ptl_handle_me_t match_any;
--\newline
--
--\newline
-- /* create the event queue */
--\newline
-- if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) {
--\newline
-- fprintf( stderr, "Couldn't create the event queue
--\backslash
--n" );
--\newline
-- exit( 1 );
--\newline
-- }
--\newline
--
--\newline
-- /* build a match entry */
--\newline
-- any_proc.nid = PTL_ID_ANY;
--\newline
-- any_proc.pid = PTL_ID_ANY;
--\newline
-- PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN,
--\newline
-- &match_any );
--\newline
--
--\newline
-- /* create the memory descriptor */
--\newline
-- drop_desc.start = NULL;
--\newline
-- drop_desc.length = 0;
--\newline
-- drop_desc.threshold = PTL_MD_THRESH_INF;
--\newline
-- drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE;
--\newline
-- drop_desc.user_ptr = NULL;
--\newline
-- drop_desc.eventq = drop_events;
--\newline
-- if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) {
--\newline
-- fprintf( stderr, "Couldn't create the memory descriptor
--\backslash
--n" );
--\newline
-- exit( 1 );
--\newline
-- }
--\newline
--
--\newline
-- /* watch for "dropped" requests */
--\newline
-- while( 1 ) {
--\newline
-- if( PtlEQWait( drop_events, &event ) != PTL_OK ) break;
--\newline
-- fprintf( stderr, "Dropped request from gid = event.initiator.gid,
-- event.initiator.rid );
--\newline
-- }
--\newline
--}
--\layout Section
--
--Message Transmission in MPI
--\begin_inset LatexCommand \label{sec:exmpi}
--
--\end_inset
--
--
--\layout Standard
--
--We conclude this section with a fairly extensive example that describes
-- an approach to implementing message transmission for MPI.
-- Like many MPI implementations, we distinguish two message transmission
-- protocols: a short message protocol and a long message protocol.
-- We use the constant
--\family typewriter
--MPI_LONG_LENGTH
--\family default
-- to determine the size of a long message.
--\layout Standard
--
--For small messages, the sender simply sends the message and presumes that
-- the message will be received (i.e., the receiver has allocated a memory region
-- to receive the message body).
-- For large messages, the sender also sends the message, but does not presume
-- that the message body will be saved.
-- Instead, the sender builds a memory descriptor for the message and enables
-- get operations on this descriptor.
-- If the target does not save the body of the message, it will record an
-- event for the put operation.
-- When the process later issues a matching MPI receive, it will perform a
-- get operation to retrieve the body of the message.
--\layout Standard
--
--To facilitate receive side matching based on the protocol, we use the most
-- significant bit in the match bits to indicate the protocol: 1 for long
-- messages and 0 for short messages.
--\layout Standard
--
--The following code presents a function that implements the send side of
-- the protocol.
-- The global variable
--\family typewriter
--EndGet
--\family default
-- is the last match entry attached to the Portal index used for posting long
-- messages.
-- This entry does not match any incoming requests (i.e., the memory descriptor
-- rejects all get operations) and is built during initialization of the MPI
-- library.
-- The other global variable,
--\family typewriter
--MPI_NI
--\family default
--, is a handle for the network interface used by the MPI implementation.
--\layout LyX-Code
--
--
--\size small
--extern ptl_handle_me_t EndGet;
--\newline
--extern ptl_handle_ni_t MPI_NI;
--\newline
--
--\newline
--void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq,
--\newline
-- ptl_process_id target, ptl_match_bits_t match )
--\newline
--{
--\newline
-- ptl_handle_md_t send_handle;
--\newline
-- ptl_md_t mem_desc;
--\newline
-- ptl_ack_req_t want_ack;
--\newline
--
--\newline
-- mem_desc.start = buf;
--\newline
-- mem_desc.length = len;
--\newline
-- mem_desc.threshold = 1;
--\newline
-- mem_desc.options = PTL_MD_GET_OP;
--\newline
-- mem_desc.user_ptr = data;
--\newline
-- mem_desc.eventq = eventq;
--\newline
--
--\newline
-- if( len >= MPI_LONG_LENGTH ) {
--\newline
-- ptl_handle_me_t me_handle;
--\newline
--
--\newline
-- /* add a match entry to the end of the get list */
--\newline
-- PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet,
-- &me_handle );
--\newline
-- PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL );
--\newline
--
--\newline
-- /* we want an ack for long messages */
--\newline
-- want_ack = PTL_ACK_REQ;
--\newline
--
--\newline
-- /* set the protocol bit to indicate that this is a long message
-- */
--\newline
-- match |= 1<<63;
--\newline
-- } else {
--\newline
-- /* we don't want an ack for short messages */
--\newline
-- want_ack = PTL_ACK_REQ;
--\newline
--
--\newline
-- /* set the protocol bit to indicate that this is a short message
-- */
--\newline
-- match &= ~(1<<63);
--\newline
-- }
--\newline
--
--\newline
-- /* create a memory descriptor and send it */
--\newline
-- PtlMDBind( MPI_NI, mem_desc, &send_handle );
--\newline
-- PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match,
-- 0 );
--\newline
--}
--\layout Standard
--
--The
--\emph on
--MPISend
--\emph default
-- function returns as soon as the message has been scheduled for transmission.
-- The event queue argument,
--\family typewriter
--eventq
--\family default
--, can be used to determine the disposition of the message.
-- Assuming that
--\family typewriter
--eventq
--\family default
-- is not
--\family typewriter
--PTL_EQ_NONE
--\family default
--, a
--\family typewriter
--PTL_EVENT_SENT
--\family default
-- event will be recorded for each message as the message is transmitted.
-- For small messages, this is the only event that will be recorded in
--\family typewriter
--eventq
--\family default
--.
-- In contrast, long messages include an explicit request for an acknowledgement.
-- If the
--\family typewriter
--target
--\family default
-- process has posted a matching receive, the acknowledgement will be sent
-- as the message is received.
-- If a matching receive has not been posted, the message will be discarded
-- and no acknowledgement will be sent.
-- When the
--\family typewriter
--target
--\family default
-- process later issues a matching receive, the receive will be translated
-- into a get operation and a
--\family typewriter
--PTL_EVENT_GET
--\family default
-- event will be recorded in
--\family typewriter
--eventq
--\family default
--.
--\layout Standard
--
--Figure\SpecialChar ~
--
--\begin_inset LatexCommand \ref{fig:mpi}
--
--\end_inset
--
-- illustrates the organization of the match list used for receiving MPI messages.
-- The initial entries (not shown in this figure) would be used to match the
-- MPI receives that have been preposted by the application.
-- The preposted receives are followed by a match entry,
--\emph on
--RcvMark
--\emph default
--, that marks the boundary between preposted receives and the memory descriptors
-- used for
--\begin_inset Quotes eld
--\end_inset
--
--unexpected
--\begin_inset Quotes erd
--\end_inset
--
-- messages.
-- The
--\emph on
--RcvMark
--\emph default
-- entry is followed by a small collection of match entries that match unexpected
--
--\begin_inset Quotes eld
--\end_inset
--
--short
--\begin_inset Quotes erd
--\end_inset
--
-- messages, i.e., messages that have a 0 in the most significant bit of their
-- match bits.
-- The memory descriptors associated with these match entries will append
-- the incoming message to the associated memory descriptor and record an
-- event in an event queue for unexpected messages.
-- The unexpected short message matching entries are followed by a match entry
-- that will match messages that were not matched by the preceding match entries,
-- i.e., the unexpected long messages.
-- The memory descriptor associated with this match entry truncates the message
-- body and records an event in the event queue for unexpected messages.
-- Note that of the memory descriptors used for unexpected messages share
-- a common event queue.
-- This makes it possible to process the unexpected messages in the order
-- in which they arrived, regardless of.
--\layout Standard
--
--
--\begin_inset Float figure
--placement htbp
--wide false
--collapsed false
--
--\layout Standard
--\align center
--
--\begin_inset Graphics FormatVersion 1
-- filename mpi.eps
-- display color
-- size_type 0
-- rotateOrigin center
-- lyxsize_type 1
-- lyxwidth 389pt
-- lyxheight 284pt
--\end_inset
--
--
--\layout Caption
--
--Message Reception in MPI
--\begin_inset LatexCommand \label{fig:mpi}
--
--\end_inset
--
--
--\end_inset
--
--
--\layout Standard
--
--When the local MPI process posts an MPI receive, we must first search the
-- events unexpected message queue to see if a matching message has already
-- arrived.
-- If no matching message is found, a match entry for the receive is inserted
-- before the
--\emph on
--RcvMark
--\emph default
-- entry--after the match entries for all of the previously posted receives
-- and before the match entries for the unexpected messages.
-- This ensures that preposted receives are matched in the order that they
-- were posted (a requirement of MPI).
--
--\layout Standard
--
--While this strategy respects the temporal semantics of MPI, it introduces
-- a race condition: a matching message might arrive after the events in the
-- unexpected message queue have been searched, but before the match entry
-- for the receive has been inserted in the match list.
--
--\layout Standard
--
--To avoid this race condition we start by setting the
--\family typewriter
--threshold
--\family default
-- of the memory descriptor to 0, making the descriptor inactive.
-- We then insert the match entry into the match list and proceed to search
-- the events in the unexpected message queue.
-- A matching message that arrives as we are searching the unexpected message
-- queue will not be accepted by the memory descriptor and, if not matched
-- by an earlier match list element, will add an event to the unexpected message
-- queue.
-- After searching the events in the unexpected message queue, we update the
-- memory descriptor, setting the threshold to 1 to activate the memory descriptor.
-- This update is predicated by the condition that the unexpected message
-- queue is empty.
-- We repeat the process of searching the unexpected message queue until the
-- update succeeds.
--\layout Standard
--
--The following code fragment illustrates this approach.
-- Because events must be removed from the unexpected message queue to be
-- examined, this code fragment assumes the existence of a user managed event
-- list,
--\family typewriter
--Rcvd
--\family default
--, for the events that have already been removed from the unexpected message
-- queue.
-- In an effort to keep the example focused on the basic protocol, we have
-- omitted the code that would be needed to manage the memory descriptors
-- used for unexpected short messages.
-- In particular, we simply leave messages in these descriptors until they
-- are received by the application.
-- In a robust implementation, we would introduce code to ensure that short
-- unexpected messages are removed from these memory descriptors so that they
-- can be re-used.
--\layout LyX-Code
--
--
--\size small
--extern ptl_handle_eq_t UnexpQueue;
--\newline
--extern ptl_handle_me_t RcvMark;
--\newline
--extern ptl_handle_me_t ShortMatch;
--\newline
--
--\newline
--typedef struct event_list_tag {
--\newline
-- ptl_event_t event;
--\newline
-- struct event_list_tag* next;
--\newline
--} event_list;
--\newline
--
--\newline
--extern event_list Rcvd;
--\newline
--
--\newline
--void AppendRcvd( ptl_event_t event )
--\newline
--{
--\newline
-- /* append an event onto the Rcvd list */
--\newline
--}
--\newline
--
--\newline
--int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi
--ts_t match,
--\newline
-- ptl_match_bits_t ignore, ptl_event_t *event )
--\newline
--{
--\newline
-- /* Search the Rcvd event queue, looking for a message that matches the
-- requested message.
--\newline
-- * If one is found, remove the event from the Rcvd list and return it.
-- */
--\newline
--}
--\newline
--
--\newline
--typedef enum { RECEIVED, POSTED } receive_state;
--\newline
--
--\newline
--receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event,
-- ptl_md_t md_buf )
--\newline
--{
--\newline
-- ptl_md_t md_buf;
--\newline
-- ptl_handle_me_t me_handle;
--\newline
--
--\newline
-- if( event.rlength >= MPI_LONG_LENGTH ) {
--\newline
-- PtlMDBind( MPI_NI, md_buf, &md_handle );
--\newline
-- PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX,
-- md_handle );
--\newline
-- return POSTED;
--\newline
-- } else {
--\newline
-- /* copy the message */
--\newline
-- if( event.mlength < *length ) *length = event.mlength;
--\newline
-- memcpy( buf, (char*)event.md_desc.start+event.offset, *length );
--\newline
-- return RECEIVED;
--\newline
-- }
--\newline
--}
--\newline
--
--\newline
--receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle
--_eq_t eventq,
--\newline
-- ptl_process_id_t sender, ptl_match_bits_t match,
-- ptl_match_bits_t ignore )
--\newline
--{
--\newline
-- ptl_md_t md_buf;
--\newline
-- ptl_handle_md_t md_handle;
--\newline
-- ptl_handle_me_t me_handle;
--\newline
-- ptl_event_t event;
--\newline
--
--\newline
-- /* build a memory descriptor for the receive */
--\newline
-- md_buf.start = buf;
--\newline
-- md_buf.length = *len;
--\newline
-- md_buf.threshold = 0; /* temporarily disabled */
--\newline
-- md_buf.options = PTL_MD_PUT_OP;
--\newline
-- md_buf.user_ptr = MPI_data;
--\newline
-- md_buf.eventq = eventq;
--\newline
--
--\newline
-- /* see if we have already received the message */
--\newline
-- if( SearchRcvd(buf, len, sender, match, ignore, &event) )
--\newline
-- return CopyMsg( buf, len, event, md_buf );
--\newline
--
--\newline
-- /* create the match entry and attach the memory descriptor */
--\newline
-- PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark,
-- &me_handle);
--\newline
-- PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle );
--\newline
--
--\newline
-- md_buf.threshold = 1;
--\newline
-- do
--\newline
-- if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) {
--\newline
-- if( MPIMatch(event, match, ignore, sender) ) {
--\newline
-- return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset,
-- md_buf );
--\newline
-- } else {
--\newline
-- AppendRcvd( event );
--\newline
-- }
--\newline
-- }
--\newline
-- while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE
-- );
--\newline
-- return POSTED;
--\newline
--}
--\layout Chapter*
--
--Acknowledgments
--\layout Standard
--
--Several people have contributed to the philosophy, design, and implementation
-- of the Portals message passing architecture as it has evolved.
-- We acknowledge the following people for their contributions: Al Audette,
-- Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike
-- Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke,
-- Dave van Dresser, Lee Ward, and Stephen Wheat.
--
--\layout Standard
--
--
--\begin_inset LatexCommand \BibTeX[ieee]{portals3}
--
--\end_inset
--
--
--\the_end
+++ /dev/null
--#FIG 3.2
--Landscape
--Center
--Inches
--Letter
--100.00
--Single
---2
--1200 2
--6 1350 900 2175 1200
--4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001
--4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001
---6
--2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 2700 1275 2700 1725
--2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-- 0 0 1.00 60.00 120.00
-- 900 525 2700 1200
--2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
-- 0 300 1200 300 1200 2250 0 2250 0 300
--2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
-- 2400 300 3600 300 3600 2250 2400 2250 2400 300
--2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2
-- 0 0 1.00 60.00 120.00
-- 2699 1788 899 1938
--4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001
--4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001
--4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001
--4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001
--4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
--4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
+++ /dev/null
--config.h
--stamp-h
--stamp-h1
--stamp-h.in
--Makefile
--Makefile.in
+++ /dev/null
--SUBDIRS = linux portals
--
--EXTRA_DIST = cygwin-ioctl.h
+++ /dev/null
--/*
-- * linux/ioctl.h for Linux by H.H. Bergman.
-- */
--
--#ifndef _ASMI386_IOCTL_H
--#define _ASMI386_IOCTL_H
--
--/* ioctl command encoding: 32 bits total, command in lower 16 bits,
-- * size of the parameter structure in the lower 14 bits of the
-- * upper 16 bits.
-- * Encoding the size of the parameter structure in the ioctl request
-- * is useful for catching programs compiled with old versions
-- * and to avoid overwriting user space outside the user buffer area.
-- * The highest 2 bits are reserved for indicating the ``access mode''.
-- * NOTE: This limits the max parameter size to 16kB -1 !
-- */
--
--/*
-- * The following is for compatibility across the various Linux
-- * platforms. The i386 ioctl numbering scheme doesn't really enforce
-- * a type field. De facto, however, the top 8 bits of the lower 16
-- * bits are indeed used as a type field, so we might just as well make
-- * this explicit here. Please be sure to use the decoding macros
-- * below from now on.
-- */
--#undef _IO
--#undef _IOR
--#undef _IOW
--#undef _IOC
--#undef IOC_IN
--#undef IOC_OUT
--
--#define _IOC_NRBITS 8
--#define _IOC_TYPEBITS 8
--#define _IOC_SIZEBITS 14
--#define _IOC_DIRBITS 2
--
--#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
--#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1)
--#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1)
--#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1)
--
--#define _IOC_NRSHIFT 0
--#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
--#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
--#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS)
--
--/*
-- * Direction bits.
-- */
--#define _IOC_NONE 0U
--#define _IOC_WRITE 1U
--#define _IOC_READ 2U
--
--#define _IOC(dir,type,nr,size) \
-- (((dir) << _IOC_DIRSHIFT) | \
-- ((type) << _IOC_TYPESHIFT) | \
-- ((nr) << _IOC_NRSHIFT) | \
-- ((size) << _IOC_SIZESHIFT))
--
--/* used to create numbers */
--#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
--#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size))
--#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
--#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
--
--/* used to decode ioctl numbers.. */
--#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
--#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
--#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
--#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
--
--/* ...and for the drivers/sound files... */
--
--#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT)
--#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT)
--#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
--#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT)
--#define IOCSIZE_SHIFT (_IOC_SIZESHIFT)
--
--#endif /* _ASMI386_IOCTL_H */
+++ /dev/null
--linuxdir = $(includedir)/linux
--
--EXTRA_DIST = kp30.h kpr.h libcfs.h lustre_list.h portals_compat25.h \
-- portals_lib.h
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- */
--#ifndef _KP30_INCLUDED
--#define _KP30_INCLUDED
--
--#include <linux/libcfs.h>
--#define PORTAL_DEBUG
-
- #ifndef offsetof
- # define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb)))
- #endif
-
- #define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
--
--#ifdef __KERNEL__
--# include <linux/vmalloc.h>
--# include <linux/time.h>
--# include <linux/slab.h>
--# include <linux/interrupt.h>
--# include <linux/highmem.h>
--# include <linux/module.h>
--# include <linux/version.h>
--# include <portals/p30.h>
--# include <linux/smp_lock.h>
--# include <asm/atomic.h>
--
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--#define schedule_work schedule_task
--#define prepare_work(wq,cb,cbdata) \
--do { \
-- INIT_TQUEUE((wq), 0, 0); \
-- PREPARE_TQUEUE((wq), (cb), (cbdata)); \
--} while (0)
--
--#define PageUptodate Page_Uptodate
--#define our_recalc_sigpending(current) recalc_sigpending(current)
--#define num_online_cpus() smp_num_cpus
--static inline void our_cond_resched(void)
--{
-- if (current->need_resched)
-- schedule ();
--}
--#define work_struct_t struct tq_struct
--
--#else
--
--#define prepare_work(wq,cb,cbdata) \
--do { \
-- INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \
--} while (0)
--#define wait_on_page wait_on_page_locked
--#define our_recalc_sigpending(current) recalc_sigpending()
--#define strtok(a,b) strpbrk(a, b)
--static inline void our_cond_resched(void)
--{
-- cond_resched();
--}
--#define work_struct_t struct work_struct
--
--#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
--
--#ifdef PORTAL_DEBUG
--extern void kportal_assertion_failed(char *expr, char *file, const char *func,
-- const int line);
--#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \
-- __FUNCTION__, __LINE__))
--#define LASSERTF(cond, fmt...) \
-- do { \
-- if (unlikely(!(cond))) { \
-- portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, __FILE__,\
-- __FUNCTION__,__LINE__, CDEBUG_STACK,\
-- "ASSERTION(" #cond ") failed:" fmt);\
-- LBUG(); \
-- } \
-- } while (0)
--
--#else
--#define LASSERT(e)
--#define LASSERTF(cond, fmt...) do { } while (0)
--#endif
--
--#ifdef CONFIG_SMP
--#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock))
--#else
--#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
--#endif
--
--#ifdef __arch_um__
--#define LBUG_WITH_LOC(file, func, line) \
--do { \
-- CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \
-- portals_debug_dumplog(); \
-- portals_run_lbug_upcall(file, func, line); \
-- panic("LBUG"); \
--} while (0)
--#else
--#define LBUG_WITH_LOC(file, func, line) \
--do { \
-- CEMERG("LBUG\n"); \
-- CERROR("STACK: %s\n", portals_debug_dumpstack()); \
-- portals_debug_dumplog(); \
-- portals_run_lbug_upcall(file, func, line); \
-- set_task_state(current, TASK_UNINTERRUPTIBLE); \
-- schedule(); \
--} while (0)
--#endif /* __arch_um__ */
--
--#define LBUG() LBUG_WITH_LOC(__FILE__, __FUNCTION__, __LINE__)
--
--/*
-- * Memory
-- */
--#ifdef PORTAL_DEBUG
--extern atomic_t portal_kmemory;
--
--# define portal_kmem_inc(ptr, size) \
--do { \
-- atomic_add(size, &portal_kmemory); \
--} while (0)
--
--# define portal_kmem_dec(ptr, size) do { \
-- atomic_sub(size, &portal_kmemory); \
--} while (0)
--
--#else
--# define portal_kmem_inc(ptr, size) do {} while (0)
--# define portal_kmem_dec(ptr, size) do {} while (0)
--#endif /* PORTAL_DEBUG */
--
--#define PORTAL_VMALLOC_SIZE 16384
--
--#define PORTAL_ALLOC_GFP(ptr, size, mask) \
--do { \
-- LASSERT(!in_interrupt() || \
-- (size <= PORTAL_VMALLOC_SIZE && mask == GFP_ATOMIC)); \
-- if ((size) > PORTAL_VMALLOC_SIZE) \
-- (ptr) = vmalloc(size); \
-- else \
-- (ptr) = kmalloc((size), (mask)); \
-- if ((ptr) == NULL) { \
-- CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\
-- #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
-- CERROR("PORTALS: %d total bytes allocated by portals\n", \
-- atomic_read(&portal_kmemory)); \
-- } else { \
-- portal_kmem_inc((ptr), (size)); \
-- memset((ptr), 0, (size)); \
-- } \
-- CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \
-- (int)(size), (ptr), atomic_read (&portal_kmemory)); \
--} while (0)
--
--#define PORTAL_ALLOC(ptr, size) \
-- PORTAL_ALLOC_GFP(ptr, size, GFP_NOFS)
--
--#define PORTAL_ALLOC_ATOMIC(ptr, size) \
-- PORTAL_ALLOC_GFP(ptr, size, GFP_ATOMIC)
--
--#define PORTAL_FREE(ptr, size) \
--do { \
-- int s = (size); \
-- if ((ptr) == NULL) { \
-- CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \
-- "%s:%d\n", s, __FILE__, __LINE__); \
-- break; \
-- } \
-- if (s > PORTAL_VMALLOC_SIZE) \
-- vfree(ptr); \
-- else \
-- kfree(ptr); \
-- portal_kmem_dec((ptr), s); \
-- CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \
-- s, (ptr), atomic_read(&portal_kmemory)); \
--} while (0)
--
--/* ------------------------------------------------------------------- */
--
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--
--#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x)
--#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x)
--
--#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x))
--#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
--
--#define PORTAL_MODULE_USE MOD_INC_USE_COUNT
--#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT
--#else
--
--#define PORTAL_SYMBOL_REGISTER(x)
--#define PORTAL_SYMBOL_UNREGISTER(x)
--
--#define PORTAL_SYMBOL_GET(x) symbol_get(x)
--#define PORTAL_SYMBOL_PUT(x) symbol_put(x)
--
--#define PORTAL_MODULE_USE try_module_get(THIS_MODULE)
--#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE)
--
--#endif
--
--/******************************************************************************/
--
--#ifdef PORTALS_PROFILING
--#define prof_enum(FOO) PROF__##FOO
--enum {
-- prof_enum(our_recvmsg),
-- prof_enum(our_sendmsg),
-- prof_enum(socknal_recv),
-- prof_enum(lib_parse),
-- prof_enum(conn_list_walk),
-- prof_enum(memcpy),
-- prof_enum(lib_finalize),
-- prof_enum(pingcli_time),
-- prof_enum(gmnal_send),
-- prof_enum(gmnal_recv),
-- MAX_PROFS
--};
--
--struct prof_ent {
-- char *str;
-- /* hrmph. wrap-tastic. */
-- u32 starts;
-- u32 finishes;
-- cycles_t total_cycles;
-- cycles_t start;
-- cycles_t end;
--};
--
--extern struct prof_ent prof_ents[MAX_PROFS];
--
--#define PROF_START(FOO) \
-- do { \
-- struct prof_ent *pe = &prof_ents[PROF__##FOO]; \
-- pe->starts++; \
-- pe->start = get_cycles(); \
-- } while (0)
--
--#define PROF_FINISH(FOO) \
-- do { \
-- struct prof_ent *pe = &prof_ents[PROF__##FOO]; \
-- pe->finishes++; \
-- pe->end = get_cycles(); \
-- pe->total_cycles += (pe->end - pe->start); \
-- } while (0)
--#else /* !PORTALS_PROFILING */
--#define PROF_START(FOO) do {} while(0)
--#define PROF_FINISH(FOO) do {} while(0)
--#endif /* PORTALS_PROFILING */
--
--/* debug.c */
--extern spinlock_t stack_backtrace_lock;
--
--char *portals_debug_dumpstack(void);
--void portals_run_upcall(char **argv);
--void portals_run_lbug_upcall(char * file, const char *fn, const int line);
--void portals_debug_dumplog(void);
--int portals_debug_init(unsigned long bufsize);
--int portals_debug_cleanup(void);
--int portals_debug_clear_buffer(void);
--int portals_debug_mark_buffer(char *text);
--int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
-- char *file, unsigned int size);
--__s32 portals_debug_copy_to_user(char *buf, unsigned long len);
--#if (__GNUC__)
--/* Use the special GNU C __attribute__ hack to have the compiler check the
-- * printf style argument string against the actual argument count and
-- * types.
-- */
--#ifdef printf
--# warning printf has been defined as a macro...
--# undef printf
--#endif
--void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
-- const int line, unsigned long stack,
-- char *format, ...)
-- __attribute__ ((format (printf, 7, 8)));
--#else
--void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
-- const int line, unsigned long stack,
-- const char *format, ...);
--#endif /* __GNUC__ */
--void portals_debug_set_level(unsigned int debug_level);
--
--# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
--# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
--# define time(a) CURRENT_TIME
--
--extern void kportal_daemonize (char *name);
--extern void kportal_blockallsigs (void);
--
--#else /* !__KERNEL__ */
--# include <stdio.h>
--# include <stdlib.h>
--#ifndef __CYGWIN__
--# include <stdint.h>
--#else
--# include <cygwin-ioctl.h>
--#endif
--# include <unistd.h>
--# include <time.h>
--# include <limits.h>
--# include <asm/types.h>
--# ifndef DEBUG_SUBSYSTEM
--# define DEBUG_SUBSYSTEM S_UNDEFINED
--# endif
--# ifdef PORTAL_DEBUG
--# undef NDEBUG
--# include <assert.h>
--# define LASSERT(e) assert(e)
- # define LASSERTF(cond, args...) assert(cond)
-# define LASSERTF(cond, args...) \
-do { \
- if (!(cond)) \
- CERROR(args); \
- assert(cond); \
-} while (0)
--# else
--# define LASSERT(e)
--# define LASSERTF(cond, args...) do { } while (0)
--# endif
--# define printk(format, args...) printf (format, ## args)
--# define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
--# define PORTAL_FREE(a, b) do { free(a); } while (0);
--void portals_debug_dumplog(void);
--# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
-- printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \
-- (subsys), (mask), (long)time(0), file, fn, line, \
-- getpid() , stack, ## a);
-
- #undef CWARN
- #undef CERROR
- #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
- #define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
--#endif
--
--/* support decl needed both by kernel and liblustre */
--char *portals_nid2str(int nal, ptl_nid_t nid, char *str);
-char *portals_id2str(int nal, ptl_process_id_t nid, char *str);
--
--#ifndef CURRENT_TIME
--# define CURRENT_TIME time(0)
--#endif
--
--/******************************************************************************/
--/* Light-weight trace
-- * Support for temporary event tracing with minimal Heisenberg effect. */
--#define LWT_SUPPORT 0
--
- #define LWT_MEMORY (64<<20)
- #define LWT_MAX_CPUS 4
-#define LWT_MEMORY (16<<20)
--
-#if !KLWT_SUPPORT
-/* kernel hasn't defined this? */
--typedef struct {
- cycles_t lwte_when;
- long long lwte_when;
-- char *lwte_where;
-- void *lwte_task;
-- long lwte_p1;
-- long lwte_p2;
-- long lwte_p3;
-- long lwte_p4;
- #if BITS_PER_LONG > 32
-# if BITS_PER_LONG > 32
-- long lwte_pad;
- #endif
-# endif
--} lwt_event_t;
-#endif /* !KLWT_SUPPORT */
--
--#if LWT_SUPPORT
- #ifdef __KERNEL__
- #define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t))
-# ifdef __KERNEL__
-# if !KLWT_SUPPORT
--
--typedef struct _lwt_page {
-- struct list_head lwtp_list;
-- struct page *lwtp_page;
-- lwt_event_t *lwtp_events;
--} lwt_page_t;
--
--typedef struct {
-- int lwtc_current_index;
-- lwt_page_t *lwtc_current_page;
--} lwt_cpu_t;
--
--extern int lwt_enabled;
--extern lwt_cpu_t lwt_cpus[];
-
- extern int lwt_init (void);
- extern void lwt_fini (void);
- extern int lwt_lookup_string (int *size, char *knlptr,
- char *usrptr, int usrsize);
- extern int lwt_control (int enable, int clear);
- extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
- void *user_ptr, int user_size);
--
--/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
-- * This stuff is meant for finding specific problems; it never stays in
-- * production code... */
--
--#define LWTSTR(n) #n
--#define LWTWHERE(f,l) f ":" LWTSTR(l)
-#define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t))
--
--#define LWT_EVENT(p1, p2, p3, p4) \
--do { \
-- unsigned long flags; \
-- lwt_cpu_t *cpu; \
-- lwt_page_t *p; \
-- lwt_event_t *e; \
- \
- local_irq_save (flags); \
-- \
-- if (lwt_enabled) { \
- local_irq_save (flags); \
- \
-- cpu = &lwt_cpus[smp_processor_id()]; \
-- p = cpu->lwtc_current_page; \
-- e = &p->lwtp_events[cpu->lwtc_current_index++]; \
-- \
-- if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \
-- cpu->lwtc_current_page = \
-- list_entry (p->lwtp_list.next, \
-- lwt_page_t, lwtp_list); \
-- cpu->lwtc_current_index = 0; \
-- } \
-- \
-- e->lwte_when = get_cycles(); \
-- e->lwte_where = LWTWHERE(__FILE__,__LINE__); \
-- e->lwte_task = current; \
-- e->lwte_p1 = (long)(p1); \
-- e->lwte_p2 = (long)(p2); \
-- e->lwte_p3 = (long)(p3); \
-- e->lwte_p4 = (long)(p4); \
- } \
-- \
- local_irq_restore (flags); \
- local_irq_restore (flags); \
- } \
--} while (0)
- #else /* __KERNEL__ */
- #define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */
- #endif /* __KERNEL__ */
-
-#endif /* !KLWT_SUPPORT */
-
-extern int lwt_init (void);
-extern void lwt_fini (void);
-extern int lwt_lookup_string (int *size, char *knlptr,
- char *usrptr, int usrsize);
-extern int lwt_control (int enable, int clear);
-extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
- void *user_ptr, int user_size);
-# else /* __KERNEL__ */
-# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */
-# endif /* __KERNEL__ */
--#endif /* LWT_SUPPORT */
--
--struct portals_device_userstate
--{
-- int pdu_memhog_pages;
-- struct page *pdu_memhog_root_page;
--};
--
--#include <linux/portals_lib.h>
--
--/*
-- * USER LEVEL STUFF BELOW
-- */
--
--#define PORTAL_IOCTL_VERSION 0x00010007
--#define PING_SYNC 0
--#define PING_ASYNC 1
--
--struct portal_ioctl_hdr {
-- __u32 ioc_len;
-- __u32 ioc_version;
--};
--
--struct portals_debug_ioctl_data
--{
-- struct portal_ioctl_hdr hdr;
-- unsigned int subs;
-- unsigned int debug;
--};
--
--#define PORTAL_IOC_INIT(data) \
--do { \
-- memset(&data, 0, sizeof(data)); \
-- data.ioc_version = PORTAL_IOCTL_VERSION; \
-- data.ioc_len = sizeof(data); \
--} while (0)
--
--/* FIXME check conflict with lustre_lib.h */
--#define PTL_IOC_DEBUG_MASK _IOWR('f', 250, long)
--
--static inline int portal_ioctl_packlen(struct portal_ioctl_data *data)
--{
-- int len = sizeof(*data);
-- len += size_round(data->ioc_inllen1);
-- len += size_round(data->ioc_inllen2);
-- return len;
--}
--
--static inline int portal_ioctl_is_invalid(struct portal_ioctl_data *data)
--{
-- if (data->ioc_len > (1<<30)) {
-- CERROR ("PORTALS ioctl: ioc_len larger than 1<<30\n");
-- return 1;
-- }
-- if (data->ioc_inllen1 > (1<<30)) {
-- CERROR ("PORTALS ioctl: ioc_inllen1 larger than 1<<30\n");
-- return 1;
-- }
-- if (data->ioc_inllen2 > (1<<30)) {
-- CERROR ("PORTALS ioctl: ioc_inllen2 larger than 1<<30\n");
-- return 1;
-- }
-- if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
-- CERROR ("PORTALS ioctl: inlbuf1 pointer but 0 length\n");
-- return 1;
-- }
-- if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
-- CERROR ("PORTALS ioctl: inlbuf2 pointer but 0 length\n");
-- return 1;
-- }
-- if (data->ioc_pbuf1 && !data->ioc_plen1) {
-- CERROR ("PORTALS ioctl: pbuf1 pointer but 0 length\n");
-- return 1;
-- }
-- if (data->ioc_pbuf2 && !data->ioc_plen2) {
-- CERROR ("PORTALS ioctl: pbuf2 pointer but 0 length\n");
-- return 1;
-- }
-- if (data->ioc_plen1 && !data->ioc_pbuf1) {
-- CERROR ("PORTALS ioctl: plen1 nonzero but no pbuf1 pointer\n");
-- return 1;
-- }
-- if (data->ioc_plen2 && !data->ioc_pbuf2) {
-- CERROR ("PORTALS ioctl: plen2 nonzero but no pbuf2 pointer\n");
-- return 1;
-- }
-- if (portal_ioctl_packlen(data) != data->ioc_len ) {
-- CERROR ("PORTALS ioctl: packlen != ioc_len\n");
-- return 1;
-- }
-- if (data->ioc_inllen1 &&
-- data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
-- CERROR ("PORTALS ioctl: inlbuf1 not 0 terminated\n");
-- return 1;
-- }
-- if (data->ioc_inllen2 &&
-- data->ioc_bulk[size_round(data->ioc_inllen1) +
-- data->ioc_inllen2 - 1] != '\0') {
-- CERROR ("PORTALS ioctl: inlbuf2 not 0 terminated\n");
-- return 1;
-- }
-- return 0;
--}
--
--#ifndef __KERNEL__
--static inline int portal_ioctl_pack(struct portal_ioctl_data *data, char **pbuf,
-- int max)
--{
-- char *ptr;
-- struct portal_ioctl_data *overlay;
-- data->ioc_len = portal_ioctl_packlen(data);
-- data->ioc_version = PORTAL_IOCTL_VERSION;
--
-- if (*pbuf && portal_ioctl_packlen(data) > max)
-- return 1;
-- if (*pbuf == NULL) {
-- *pbuf = malloc(data->ioc_len);
-- }
-- if (!*pbuf)
-- return 1;
-- overlay = (struct portal_ioctl_data *)*pbuf;
-- memcpy(*pbuf, data, sizeof(*data));
--
-- ptr = overlay->ioc_bulk;
-- if (data->ioc_inlbuf1)
-- LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
-- if (data->ioc_inlbuf2)
-- LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
-- if (portal_ioctl_is_invalid(overlay))
-- return 1;
--
-- return 0;
--}
--#else
--#include <asm/uaccess.h>
--
--/* buffer MUST be at least the size of portal_ioctl_hdr */
--static inline int portal_ioctl_getdata(char *buf, char *end, void *arg)
--{
-- struct portal_ioctl_hdr *hdr;
-- struct portal_ioctl_data *data;
-- int err;
-- ENTRY;
--
-- hdr = (struct portal_ioctl_hdr *)buf;
-- data = (struct portal_ioctl_data *)buf;
--
-- err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
-- if ( err ) {
-- EXIT;
-- return err;
-- }
--
-- if (hdr->ioc_version != PORTAL_IOCTL_VERSION) {
-- CERROR ("PORTALS: version mismatch kernel vs application\n");
-- return -EINVAL;
-- }
--
-- if (hdr->ioc_len + buf >= end) {
-- CERROR ("PORTALS: user buffer exceeds kernel buffer\n");
-- return -EINVAL;
-- }
--
--
-- if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) {
-- CERROR ("PORTALS: user buffer too small for ioctl\n");
-- return -EINVAL;
-- }
--
-- err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
-- if ( err ) {
-- EXIT;
-- return err;
-- }
--
-- if (portal_ioctl_is_invalid(data)) {
-- CERROR ("PORTALS: ioctl not correctly formatted\n");
-- return -EINVAL;
-- }
--
-- if (data->ioc_inllen1) {
-- data->ioc_inlbuf1 = &data->ioc_bulk[0];
-- }
--
-- if (data->ioc_inllen2) {
-- data->ioc_inlbuf2 = &data->ioc_bulk[0] +
-- size_round(data->ioc_inllen1);
-- }
--
-- EXIT;
-- return 0;
--}
--#endif
--
--/* ioctls for manipulating snapshots 30- */
--#define IOC_PORTAL_TYPE 'e'
--#define IOC_PORTAL_MIN_NR 30
--
--#define IOC_PORTAL_PING _IOWR('e', 30, long)
--
--#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, long)
--#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, long)
--#define IOC_PORTAL_PANIC _IOWR('e', 34, long)
--#define IOC_PORTAL_NAL_CMD _IOWR('e', 35, long)
--#define IOC_PORTAL_GET_NID _IOWR('e', 36, long)
--#define IOC_PORTAL_FAIL_NID _IOWR('e', 37, long)
--
--#define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long)
--#define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long)
--#define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long)
--#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long)
--#define IOC_PORTAL_MAX_NR 42
--
--enum {
-- QSWNAL = 1,
-- SOCKNAL = 2,
-- GMNAL = 3,
-- /* 4 unused */
-- TCPNAL = 5,
- SCIMACNAL = 6,
- ROUTER = 7,
- IBNAL = 8,
- ROUTER = 6,
- OPENIBNAL = 7,
-- NAL_ENUM_END_MARKER
--};
-
- #ifdef __KERNEL__
- extern ptl_handle_ni_t kqswnal_ni;
- extern ptl_handle_ni_t ksocknal_ni;
- extern ptl_handle_ni_t kgmnal_ni;
- extern ptl_handle_ni_t kibnal_ni;
- extern ptl_handle_ni_t kscimacnal_ni;
- #endif
--
- #define PTL_NALFMT_SIZE 26 /* %u:%u.%u.%u.%u (10+4+4+4+3+1) */
-#define PTL_NALFMT_SIZE 30 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+4+1) */
--
--#define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1)
--
--#define NAL_CMD_REGISTER_PEER_FD 100
--#define NAL_CMD_CLOSE_CONNECTION 101
--#define NAL_CMD_REGISTER_MYNID 102
--#define NAL_CMD_PUSH_CONNECTION 103
--#define NAL_CMD_GET_CONN 104
- #define NAL_CMD_DEL_AUTOCONN 105
- #define NAL_CMD_ADD_AUTOCONN 106
- #define NAL_CMD_GET_AUTOCONN 107
-#define NAL_CMD_DEL_PEER 105
-#define NAL_CMD_ADD_PEER 106
-#define NAL_CMD_GET_PEER 107
--#define NAL_CMD_GET_TXDESC 108
--#define NAL_CMD_ADD_ROUTE 109
--#define NAL_CMD_DEL_ROUTE 110
--#define NAL_CMD_GET_ROUTE 111
--#define NAL_CMD_NOTIFY_ROUTER 112
-#define NAL_CMD_ADD_INTERFACE 113
-#define NAL_CMD_DEL_INTERFACE 114
-#define NAL_CMD_GET_INTERFACE 115
-
--
--enum {
-- DEBUG_DAEMON_START = 1,
-- DEBUG_DAEMON_STOP = 2,
-- DEBUG_DAEMON_PAUSE = 3,
-- DEBUG_DAEMON_CONTINUE = 4,
- };
-
- /* XXX remove to lustre ASAP */
- struct lustre_peer {
- ptl_nid_t peer_nid;
- ptl_handle_ni_t peer_ni;
--};
--
- /* module.c */
- typedef int (*nal_cmd_handler_t)(struct portals_cfg *, void * private);
- int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private);
- int kportal_nal_unregister(int nal);
--
--enum cfg_record_type {
-- PORTALS_CFG_TYPE = 1,
-- LUSTRE_CFG_TYPE = 123,
--};
--
--typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data);
- int kportal_nal_cmd(struct portals_cfg *);
-
- ptl_handle_ni_t *kportal_get_ni (int nal);
- void kportal_put_ni (int nal);
--
--#ifdef __CYGWIN__
--# ifndef BITS_PER_LONG
--# if (~0UL) == 0xffffffffUL
--# define BITS_PER_LONG 32
--# else
--# define BITS_PER_LONG 64
--# endif
--# endif
--#endif
--
--#if BITS_PER_LONG > 32
--# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
--# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
--# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
--#else
--# define LI_POISON ((int)0x5a5a5a5a)
--# define LL_POISON ((long)0x5a5a5a5a)
--# define LP_POISON ((void *)(long)0x5a5a5a5a)
--#endif
--
--#if defined(__x86_64__)
--# define LPU64 "%Lu"
--# define LPD64 "%Ld"
--# define LPX64 "%#Lx"
--# define LPSZ "%lu"
--# define LPSSZ "%ld"
--#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
--# define LPU64 "%Lu"
--# define LPD64 "%Ld"
--# define LPX64 "%#Lx"
--# define LPSZ "%u"
--# define LPSSZ "%d"
--#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
--# define LPU64 "%lu"
--# define LPD64 "%ld"
--# define LPX64 "%#lx"
--# define LPSZ "%lu"
--# define LPSSZ "%ld"
--#endif
--#ifndef LPU64
--# error "No word size defined"
--#endif
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * * vim:expandtab:shiftwidth=8:tabstop=8:
- * */
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
--#ifndef _KPR_H
--#define _KPR_H
--
- # include <portals/lib-nal.h> /* for ptl_hdr_t */
-# include <portals/lib-types.h> /* for ptl_hdr_t */
--
--/******************************************************************************/
--/* Kernel Portals Router interface */
--
--typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
--
--/* space for routing targets to stash "stuff" in a forwarded packet */
--typedef union {
-- long long _alignment;
-- void *_space[16]; /* scale with CPU arch */
--} kprfd_scratch_t;
--
--/* Kernel Portals Routing Forwarded message Descriptor */
--typedef struct {
-- struct list_head kprfd_list; /* stash in queues (routing target can use) */
-- ptl_nid_t kprfd_target_nid; /* final destination NID */
-- ptl_nid_t kprfd_gateway_nid; /* gateway NID */
-- ptl_hdr_t *kprfd_hdr; /* header in wire byte order */
-- int kprfd_nob; /* # payload bytes */
-- int kprfd_niov; /* # payload frags */
-- ptl_kiov_t *kprfd_kiov; /* payload fragments */
-- void *kprfd_router_arg; /* originating NAL's router arg */
-- kpr_fwd_callback_t kprfd_callback; /* completion callback */
-- void *kprfd_callback_arg; /* completion callback arg */
-- kprfd_scratch_t kprfd_scratch; /* scratchpad for routing targets */
--} kpr_fwd_desc_t;
--
--typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
--typedef void (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
--
--/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
--typedef const struct {
-- int kprni_nalid; /* NAL's id */
-- void *kprni_arg; /* Arg to pass when calling into NAL */
-- kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */
-- kpr_notify_t kprni_notify; /* NAL's notification entrypoint */
--} kpr_nal_interface_t;
--
--/* Router's routing interface (Kernel Portals Routing Router Interface) */
--typedef const struct {
-- /* register the calling NAL with the router and get back the handle for
-- * subsequent calls */
-- int (*kprri_register) (kpr_nal_interface_t *nal_interface,
-- void **router_arg);
--
-- /* ask the router to find a gateway that forwards to 'nid' and is a
-- * peer of the calling NAL; assume caller will send 'nob' bytes of
-- * payload there */
-- int (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
-- ptl_nid_t *gateway_nid);
--
-- /* hand a packet over to the router for forwarding */
-- kpr_fwd_t kprri_fwd_start;
--
-- /* hand a packet back to the router for completion */
-- void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
-- int error);
--
-- /* notify the router about peer state */
-- void (*kprri_notify) (void *router_arg, ptl_nid_t peer,
-- int alive, time_t when);
--
-- /* the calling NAL is shutting down */
-- void (*kprri_shutdown) (void *router_arg);
--
-- /* deregister the calling NAL with the router */
-- void (*kprri_deregister) (void *router_arg);
--
--} kpr_router_interface_t;
--
--/* Convenient struct for NAL to stash router interface/args */
--typedef struct {
-- kpr_router_interface_t *kpr_interface;
-- void *kpr_arg;
--} kpr_router_t;
-
- /* Router's control interface (Kernel Portals Routing Control Interface) */
- typedef const struct {
- int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid);
- int (*kprci_del_route)(int gateway_nal, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid);
- int (*kprci_get_route)(int index, int *gateway_nal,
- ptl_nid_t *gateway,
- ptl_nid_t *lo_nid, ptl_nid_t *hi_nid,
- int *alive);
- int (*kprci_notify)(int gateway_nal, ptl_nid_t gateway_nid,
- int alive, time_t when);
- } kpr_control_interface_t;
--
- extern kpr_control_interface_t kpr_control_interface;
--extern kpr_router_interface_t kpr_router_interface;
--
--static inline int
--kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
--{
-- int rc;
--
-- router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
-- if (router->kpr_interface == NULL)
-- return (-ENOENT);
--
-- rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
-- if (rc != 0)
-- router->kpr_interface = NULL;
--
-- PORTAL_SYMBOL_PUT (kpr_router_interface);
-- return (rc);
--}
--
--static inline int
--kpr_routing (kpr_router_t *router)
--{
-- return (router->kpr_interface != NULL);
--}
--
--static inline int
--kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
--{
-- if (!kpr_routing (router))
-- return (-ENETUNREACH);
--
-- return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
-- gateway_nid));
--}
--
--static inline void
--kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
-- int nob, int niov, ptl_kiov_t *kiov,
-- kpr_fwd_callback_t callback, void *callback_arg)
--{
-- fwd->kprfd_target_nid = nid;
-- fwd->kprfd_gateway_nid = nid;
-- fwd->kprfd_hdr = hdr;
-- fwd->kprfd_nob = nob;
-- fwd->kprfd_niov = niov;
-- fwd->kprfd_kiov = kiov;
-- fwd->kprfd_callback = callback;
-- fwd->kprfd_callback_arg = callback_arg;
--}
--
--static inline void
--kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
--{
-- if (!kpr_routing (router))
-- fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
-- else
-- router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
--}
--
--static inline void
--kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
--{
-- LASSERT (kpr_routing (router));
-- router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
--}
--
--static inline void
--kpr_notify (kpr_router_t *router,
-- ptl_nid_t peer, int alive, time_t when)
--{
-- if (!kpr_routing (router))
-- return;
--
-- router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
--}
--
--static inline void
--kpr_shutdown (kpr_router_t *router)
--{
-- if (kpr_routing (router))
-- router->kpr_interface->kprri_shutdown (router->kpr_arg);
--}
--
--static inline void
--kpr_deregister (kpr_router_t *router)
--{
-- if (!kpr_routing (router))
-- return;
-- router->kpr_interface->kprri_deregister (router->kpr_arg);
-- router->kpr_interface = NULL;
--}
--
--#endif /* _KPR_H */
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- */
--#ifndef _LIBCFS_H
--#define _LIBCFS_H
-
-#include <asm/types.h>
-
-#ifdef __KERNEL__
-# include <linux/time.h>
-# include <asm/timex.h>
-#else
-# include <sys/time.h>
-# define do_gettimeofday(tv) gettimeofday(tv, NULL);
-typedef unsigned long long cycles_t;
-#endif
--
--#define PORTAL_DEBUG
--
--#ifndef offsetof
- # define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb)))
-# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
--#endif
--
--#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
-
-#ifndef __KERNEL__
-/* Userpace byte flipping */
-# include <endian.h>
-# include <byteswap.h>
-# define __swab16(x) bswap_16(x)
-# define __swab32(x) bswap_32(x)
-# define __swab64(x) bswap_64(x)
-# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0)
-# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0)
-# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0)
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-# define le16_to_cpu(x) (x)
-# define cpu_to_le16(x) (x)
-# define le32_to_cpu(x) (x)
-# define cpu_to_le32(x) (x)
-# define le64_to_cpu(x) (x)
-# define cpu_to_le64(x) (x)
-# else
-# if __BYTE_ORDER == __BIG_ENDIAN
-# define le16_to_cpu(x) bswap_16(x)
-# define cpu_to_le16(x) bswap_16(x)
-# define le32_to_cpu(x) bswap_32(x)
-# define cpu_to_le32(x) bswap_32(x)
-# define le64_to_cpu(x) bswap_64(x)
-# define cpu_to_le64(x) bswap_64(x)
-# else
-# error "Unknown byte order"
-# endif /* __BIG_ENDIAN */
-# endif /* __LITTLE_ENDIAN */
-#endif /* ! __KERNEL__ */
--
--/*
-- * Debugging
-- */
--extern unsigned int portal_subsystem_debug;
--extern unsigned int portal_stack;
--extern unsigned int portal_debug;
--extern unsigned int portal_printk;
--
--#include <asm/types.h>
--struct ptldebug_header {
-- __u32 ph_len;
-- __u32 ph_flags;
-- __u32 ph_subsys;
-- __u32 ph_mask;
-- __u32 ph_cpu_id;
-- __u32 ph_sec;
-- __u64 ph_usec;
-- __u32 ph_stack;
-- __u32 ph_pid;
-- __u32 ph_extern_pid;
-- __u32 ph_line_num;
--} __attribute__((packed));
--
--#define PH_FLAG_FIRST_RECORD 1
--
--/* Debugging subsystems (32 bits, non-overlapping) */
--#define S_UNDEFINED 0x00000001
--#define S_MDC 0x00000002
--#define S_MDS 0x00000004
--#define S_OSC 0x00000008
--#define S_OST 0x00000010
--#define S_CLASS 0x00000020
--#define S_LOG 0x00000040
--#define S_LLITE 0x00000080
--#define S_RPC 0x00000100
--#define S_MGMT 0x00000200
--#define S_PORTALS 0x00000400
--#define S_SOCKNAL 0x00000800
--#define S_QSWNAL 0x00001000
--#define S_PINGER 0x00002000
--#define S_FILTER 0x00004000
--#define S_PTLBD 0x00008000
--#define S_ECHO 0x00010000
--#define S_LDLM 0x00020000
--#define S_LOV 0x00040000
--#define S_GMNAL 0x00080000
--#define S_PTLROUTER 0x00100000
--#define S_COBD 0x00200000
- #define S_IBNAL 0x00400000
-#define S_OPENIBNAL 0x00400000
-#define S_SM 0x00800000
-#define S_ASOBD 0x01000000
-#define S_LMV 0x02000000
-#define S_CMOBD 0x04000000
--
--/* If you change these values, please keep portals/utils/debug.c
-- * up to date! */
--
--/* Debugging masks (32 bits, non-overlapping) */
--#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */
--#define D_INODE 0x00000002
--#define D_SUPER 0x00000004
--#define D_EXT2 0x00000008 /* anything from ext2_debug */
--#define D_MALLOC 0x00000010 /* print malloc, free information */
--#define D_CACHE 0x00000020 /* cache-related items */
--#define D_INFO 0x00000040 /* general information */
--#define D_IOCTL 0x00000080 /* ioctl related information */
--#define D_BLOCKS 0x00000100 /* ext2 block allocation */
--#define D_NET 0x00000200 /* network communications */
--#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
--#define D_BUFFS 0x00000800
--#define D_OTHER 0x00001000
--#define D_DENTRY 0x00002000
--#define D_PORTALS 0x00004000 /* ENTRY/EXIT markers */
--#define D_PAGE 0x00008000 /* bulk page handling */
--#define D_DLMTRACE 0x00010000
--#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
--#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
--#define D_HA 0x00080000 /* recovery and failover */
--#define D_RPCTRACE 0x00100000 /* for distributed debugging */
--#define D_VFSTRACE 0x00200000
--#define D_READA 0x00400000 /* read-ahead */
-
-#define D_MMAP 0x00800000
-#define D_CONFIG 0x01000000
--#ifdef __KERNEL__
--# include <linux/sched.h> /* THREAD_SIZE */
--#else
--# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
--# define THREAD_SIZE 8192
--# endif
--#endif
--
--#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
--
--#ifdef __KERNEL__
--# ifdef __ia64__
--# define CDEBUG_STACK (THREAD_SIZE - \
-- ((unsigned long)__builtin_dwarf_cfa() & \
-- (THREAD_SIZE - 1)))
--# else
--# define CDEBUG_STACK (THREAD_SIZE - \
-- ((unsigned long)__builtin_frame_address(0) & \
-- (THREAD_SIZE - 1)))
- # endif
-# endif /* __ia64__ */
--
--#define CHECK_STACK(stack) \
-- do { \
-- if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
-- portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \
-- __FILE__, __FUNCTION__, __LINE__, \
-- (stack),"maximum lustre stack %u\n",\
-- portal_stack = (stack)); \
-- /*panic("LBUG");*/ \
-- } \
-- } while (0)
- #else /* __KERNEL__ */
-#else /* !__KERNEL__ */
--#define CHECK_STACK(stack) do { } while(0)
--#define CDEBUG_STACK (0L)
--#endif /* __KERNEL__ */
--
--#if 1
--#define CDEBUG(mask, format, a...) \
--do { \
-- CHECK_STACK(CDEBUG_STACK); \
-- if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \
-- (portal_debug & (mask) && \
-- portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
-- portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
-- __FILE__, __FUNCTION__, __LINE__, \
-- CDEBUG_STACK, format, ## a); \
- } while (0)
-
- #define CDEBUG_MAX_LIMIT 600
- #define CDEBUG_LIMIT(cdebug_mask, cdebug_format, a...) \
- do { \
- static unsigned long cdebug_next; \
- static int cdebug_count, cdebug_delay = 1; \
- \
- CHECK_STACK(CDEBUG_STACK); \
- if (time_after(jiffies, cdebug_next)) { \
- portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, __FILE__, \
- __FUNCTION__, __LINE__, CDEBUG_STACK, \
- cdebug_format, ## a); \
- if (cdebug_count) { \
- portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, \
- __FILE__, __FUNCTION__, __LINE__, \
- 0, cdebug_format, ## a); \
- cdebug_count = 0; \
- } \
- if (time_after(jiffies, cdebug_next+(CDEBUG_MAX_LIMIT+10)*HZ))\
- cdebug_delay = cdebug_delay > 8 ? cdebug_delay/8 : 1; \
- else \
- cdebug_delay = cdebug_delay*2 >= CDEBUG_MAX_LIMIT*HZ? \
- CDEBUG_MAX_LIMIT * HZ : cdebug_delay*2; \
- cdebug_next = jiffies + cdebug_delay; \
- } else { \
- portals_debug_msg(DEBUG_SUBSYSTEM, \
- portal_debug & ~(D_EMERG|D_ERROR|D_WARNING),\
- __FILE__, __FUNCTION__, __LINE__, \
- CDEBUG_STACK, cdebug_format, ## a); \
- cdebug_count++; \
- } \
--} while (0)
--
- #define CWARN(format, a...) CDEBUG_LIMIT(D_WARNING, format, ## a)
- #define CERROR(format, a...) CDEBUG_LIMIT(D_ERROR, format, ## a)
-#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
-#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
--#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
--
--#define GOTO(label, rc) \
--do { \
-- long GOTO__ret = (long)(rc); \
-- CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
-- #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
-- (signed long)GOTO__ret); \
-- goto label; \
--} while (0)
--
--#define RETURN(rc) \
--do { \
-- typeof(rc) RETURN__ret = (rc); \
-- CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \
-- (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
-- return RETURN__ret; \
--} while (0)
--
--#define ENTRY \
--do { \
-- CDEBUG(D_TRACE, "Process entered\n"); \
--} while (0)
--
--#define EXIT \
--do { \
-- CDEBUG(D_TRACE, "Process leaving\n"); \
--} while(0)
--#else
--#define CDEBUG(mask, format, a...) do { } while (0)
- #define CWARN(format, a...) do { } while (0)
- #define CERROR(format, a...) printk("<3>" format, ## a)
- #define CEMERG(format, a...) printk("<0>" format, ## a)
-#define CWARN(format, a...) printk(KERN_WARNING format, ## a)
-#define CERROR(format, a...) printk(KERN_ERR format, ## a)
-#define CEMERG(format, a...) printk(KERN_EMERG format, ## a)
--#define GOTO(label, rc) do { (void)(rc); goto label; } while (0)
--#define RETURN(rc) return (rc)
--#define ENTRY do { } while (0)
--#define EXIT do { } while (0)
--#endif
-
-/* initial pid */
-# if CRAY_PORTALS
-/*
- *
- * 1) ptl_pid_t in cray portals is only 16 bits, not 32 bits, therefore this is too
- * big.
- *
- * 2) the implementation of ernal in cray portals further restricts the pid space
- * that may be used to 0 <= pid <= 255 (an 8 bit value). Returns an error at nal
- * init time for any pid outside this range. Other nals in cray portals don't have
- * this restriction.
- * */
-#define LUSTRE_PTL_PID 9
-# else
-#define LUSTRE_PTL_PID 12345
-# endif
-
-#define LUSTRE_SRV_PTL_PID LUSTRE_PTL_PID
--
--#define PORTALS_CFG_VERSION 0x00010001;
--
--struct portals_cfg {
-- __u32 pcfg_version;
-- __u32 pcfg_command;
--
-- __u32 pcfg_nal;
-- __u32 pcfg_flags;
--
-- __u32 pcfg_gw_nal;
-- __u64 pcfg_nid;
-- __u64 pcfg_nid2;
-- __u64 pcfg_nid3;
-- __u32 pcfg_id;
-- __u32 pcfg_misc;
-- __u32 pcfg_fd;
-- __u32 pcfg_count;
-- __u32 pcfg_size;
-- __u32 pcfg_wait;
--
-- __u32 pcfg_plen1; /* buffers in userspace */
-- char *pcfg_pbuf1;
-- __u32 pcfg_plen2; /* buffers in userspace */
-- char *pcfg_pbuf2;
--};
--
--#define PCFG_INIT(pcfg, cmd) \
--do { \
-- memset(&pcfg, 0, sizeof(pcfg)); \
-- pcfg.pcfg_version = PORTALS_CFG_VERSION; \
-- pcfg.pcfg_command = (cmd); \
-- \
--} while (0)
-
-typedef int (nal_cmd_handler_fn)(struct portals_cfg *, void *);
-int libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *arg);
-int libcfs_nal_cmd(struct portals_cfg *pcfg);
-void libcfs_nal_cmd_unregister(int nal);
--
--struct portal_ioctl_data {
-- __u32 ioc_len;
-- __u32 ioc_version;
-- __u64 ioc_nid;
-- __u64 ioc_nid2;
-- __u64 ioc_nid3;
-- __u32 ioc_count;
-- __u32 ioc_nal;
-- __u32 ioc_nal_cmd;
-- __u32 ioc_fd;
-- __u32 ioc_id;
--
-- __u32 ioc_flags;
-- __u32 ioc_size;
--
-- __u32 ioc_wait;
-- __u32 ioc_timeout;
-- __u32 ioc_misc;
--
-- __u32 ioc_inllen1;
-- char *ioc_inlbuf1;
-- __u32 ioc_inllen2;
-- char *ioc_inlbuf2;
--
-- __u32 ioc_plen1; /* buffers in userspace */
-- char *ioc_pbuf1;
-- __u32 ioc_plen2; /* buffers in userspace */
-- char *ioc_pbuf2;
--
-- char ioc_bulk[0];
--};
-
--
--#ifdef __KERNEL__
--
--#include <linux/list.h>
--
--struct libcfs_ioctl_handler {
-- struct list_head item;
-- int (*handle_ioctl)(struct portal_ioctl_data *data,
-- unsigned int cmd, unsigned long args);
--};
--
--#define DECLARE_IOCTL_HANDLER(ident, func) \
-- struct libcfs_ioctl_handler ident = { \
-- .item = LIST_HEAD_INIT(ident.item), \
-- .handle_ioctl = func \
-- }
--
--int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
--int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
--
--#endif
--
--#define _LIBCFS_H
--
--#endif /* _LIBCFS_H */
+++ /dev/null
--#ifndef _LUSTRE_LIST_H
--#define _LUSTRE_LIST_H
--
--#ifdef __KERNEL__
--#include <linux/list.h>
--#else
--/*
-- * Simple doubly linked list implementation.
-- *
-- * Some of the internal functions ("__xxx") are useful when
-- * manipulating whole lists rather than single entries, as
-- * sometimes we already know the next/prev entries and we can
-- * generate better code by using them directly rather than
-- * using the generic single-entry routines.
-- */
--
--#define prefetch(a) ((void)a)
--
--struct list_head {
-- struct list_head *next, *prev;
--};
--
--typedef struct list_head list_t;
--
--#define LIST_HEAD_INIT(name) { &(name), &(name) }
--
--#define LIST_HEAD(name) \
-- struct list_head name = LIST_HEAD_INIT(name)
--
--#define INIT_LIST_HEAD(ptr) do { \
-- (ptr)->next = (ptr); (ptr)->prev = (ptr); \
--} while (0)
--
--/*
-- * Insert a new entry between two known consecutive entries.
-- *
-- * This is only for internal list manipulation where we know
-- * the prev/next entries already!
-- */
--static inline void __list_add(struct list_head * new,
-- struct list_head * prev,
-- struct list_head * next)
--{
-- next->prev = new;
-- new->next = next;
-- new->prev = prev;
-- prev->next = new;
--}
--
--/**
-- * list_add - add a new entry
-- * @new: new entry to be added
-- * @head: list head to add it after
-- *
-- * Insert a new entry after the specified head.
-- * This is good for implementing stacks.
-- */
--static inline void list_add(struct list_head *new, struct list_head *head)
--{
-- __list_add(new, head, head->next);
--}
--
--/**
-- * list_add_tail - add a new entry
-- * @new: new entry to be added
-- * @head: list head to add it before
-- *
-- * Insert a new entry before the specified head.
-- * This is useful for implementing queues.
-- */
--static inline void list_add_tail(struct list_head *new, struct list_head *head)
--{
-- __list_add(new, head->prev, head);
--}
--
--/*
-- * Delete a list entry by making the prev/next entries
-- * point to each other.
-- *
-- * This is only for internal list manipulation where we know
-- * the prev/next entries already!
-- */
--static inline void __list_del(struct list_head * prev, struct list_head * next)
--{
-- next->prev = prev;
-- prev->next = next;
--}
--
--/**
-- * list_del - deletes entry from list.
-- * @entry: the element to delete from the list.
-- * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
-- */
--static inline void list_del(struct list_head *entry)
--{
-- __list_del(entry->prev, entry->next);
--}
--
--/**
-- * list_del_init - deletes entry from list and reinitialize it.
-- * @entry: the element to delete from the list.
-- */
--static inline void list_del_init(struct list_head *entry)
--{
-- __list_del(entry->prev, entry->next);
-- INIT_LIST_HEAD(entry);
--}
--
--/**
-- * list_move - delete from one list and add as another's head
-- * @list: the entry to move
-- * @head: the head that will precede our entry
-- */
--static inline void list_move(struct list_head *list, struct list_head *head)
--{
-- __list_del(list->prev, list->next);
-- list_add(list, head);
--}
--
--/**
-- * list_move_tail - delete from one list and add as another's tail
-- * @list: the entry to move
-- * @head: the head that will follow our entry
-- */
--static inline void list_move_tail(struct list_head *list,
-- struct list_head *head)
--{
-- __list_del(list->prev, list->next);
-- list_add_tail(list, head);
--}
--
--/**
-- * list_empty - tests whether a list is empty
-- * @head: the list to test.
-- */
--static inline int list_empty(struct list_head *head)
--{
-- return head->next == head;
--}
--
--static inline void __list_splice(struct list_head *list,
-- struct list_head *head)
--{
-- struct list_head *first = list->next;
-- struct list_head *last = list->prev;
-- struct list_head *at = head->next;
--
-- first->prev = head;
-- head->next = first;
--
-- last->next = at;
-- at->prev = last;
--}
--
--/**
-- * list_splice - join two lists
-- * @list: the new list to add.
-- * @head: the place to add it in the first list.
-- */
--static inline void list_splice(struct list_head *list, struct list_head *head)
--{
-- if (!list_empty(list))
-- __list_splice(list, head);
--}
--
--/**
-- * list_splice_init - join two lists and reinitialise the emptied list.
-- * @list: the new list to add.
-- * @head: the place to add it in the first list.
-- *
-- * The list at @list is reinitialised
-- */
--static inline void list_splice_init(struct list_head *list,
-- struct list_head *head)
--{
-- if (!list_empty(list)) {
-- __list_splice(list, head);
-- INIT_LIST_HEAD(list);
-- }
--}
--
--/**
-- * list_entry - get the struct for this entry
-- * @ptr: the &struct list_head pointer.
-- * @type: the type of the struct this is embedded in.
-- * @member: the name of the list_struct within the struct.
-- */
--#define list_entry(ptr, type, member) \
-- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
--
--/**
-- * list_for_each - iterate over a list
-- * @pos: the &struct list_head to use as a loop counter.
-- * @head: the head for your list.
-- */
--#define list_for_each(pos, head) \
-- for (pos = (head)->next, prefetch(pos->next); pos != (head); \
-- pos = pos->next, prefetch(pos->next))
--
--/**
-- * list_for_each_prev - iterate over a list in reverse order
-- * @pos: the &struct list_head to use as a loop counter.
-- * @head: the head for your list.
-- */
--#define list_for_each_prev(pos, head) \
-- for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
-- pos = pos->prev, prefetch(pos->prev))
--
--/**
-- * list_for_each_safe - iterate over a list safe against removal of list entry
-- * @pos: the &struct list_head to use as a loop counter.
-- * @n: another &struct list_head to use as temporary storage
-- * @head: the head for your list.
-- */
--#define list_for_each_safe(pos, n, head) \
-- for (pos = (head)->next, n = pos->next; pos != (head); \
-- pos = n, n = pos->next)
--
--/**
-- * list_for_each_entry - iterate over list of given type
-- * @pos: the type * to use as a loop counter.
-- * @head: the head for your list.
-- * @member: the name of the list_struct within the struct.
-- */
--#define list_for_each_entry(pos, head, member) \
-- for (pos = list_entry((head)->next, typeof(*pos), member), \
-- prefetch(pos->member.next); \
-- &pos->member != (head); \
-- pos = list_entry(pos->member.next, typeof(*pos), member), \
-- prefetch(pos->member.next))
--
--/**
-- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
-- * @pos: the type * to use as a loop counter.
-- * @n: another type * to use as temporary storage
-- * @head: the head for your list.
-- * @member: the name of the list_struct within the struct.
-- */
--#define list_for_each_entry_safe(pos, n, head, member) \
-- for (pos = list_entry((head)->next, typeof(*pos), member), \
-- n = list_entry(pos->member.next, typeof(*pos), member); \
-- &pos->member != (head); \
-- pos = n, n = list_entry(n->member.next, typeof(*n), member))
--
--#endif /* if !__KERNEL__*/
--#endif /* if !_LUSTRE_LIST_H */
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- */
--#ifndef _PORTALS_COMPAT_H
--#define _PORTALS_COMPAT_H
--
--// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
--#if SPINLOCK_DEBUG
--# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
--# define SIGNAL_MASK_ASSERT() \
-- LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
--# else
--# define SIGNAL_MASK_ASSERT() \
-- LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
--# endif
--#else
--# define SIGNAL_MASK_ASSERT()
--#endif
--// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
--
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
--
--# define SIGNAL_MASK_LOCK(task, flags) \
-- spin_lock_irqsave(&task->sighand->siglock, flags)
--# define SIGNAL_MASK_UNLOCK(task, flags) \
-- spin_unlock_irqrestore(&task->sighand->siglock, flags)
--# define USERMODEHELPER(path, argv, envp) \
-- call_usermodehelper(path, argv, envp, 1)
--# define RECALC_SIGPENDING recalc_sigpending()
--# define CURRENT_SECONDS get_seconds()
- # define smp_num_cpus NR_CPUS
--
--#elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */
--
--# define SIGNAL_MASK_LOCK(task, flags) \
-- spin_lock_irqsave(&task->sighand->siglock, flags)
--# define SIGNAL_MASK_UNLOCK(task, flags) \
-- spin_unlock_irqrestore(&task->sighand->siglock, flags)
--# define USERMODEHELPER(path, argv, envp) \
-- call_usermodehelper(path, argv, envp)
--# define RECALC_SIGPENDING recalc_sigpending()
--# define CURRENT_SECONDS CURRENT_TIME
--
--# define kernel_text_address(addr) is_kernel_text_address(addr)
--extern int is_kernel_text_address(unsigned long addr);
--
--#else /* 2.4.x */
--
--# define SIGNAL_MASK_LOCK(task, flags) \
-- spin_lock_irqsave(&task->sigmask_lock, flags)
--# define SIGNAL_MASK_UNLOCK(task, flags) \
-- spin_unlock_irqrestore(&task->sigmask_lock, flags)
--# define USERMODEHELPER(path, argv, envp) \
-- call_usermodehelper(path, argv, envp)
--# define RECALC_SIGPENDING recalc_sigpending(current)
--# define CURRENT_SECONDS CURRENT_TIME
--
--# define kernel_text_address(addr) is_kernel_text_address(addr)
--extern int is_kernel_text_address(unsigned long addr);
--
--#endif
--
--#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
--# define THREAD_NAME(comm, len, fmt, a...) \
-- snprintf(comm, len, fmt "|%d", ## a, current->thread.extern_pid)
--#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--# define THREAD_NAME(comm, len, fmt, a...) \
-- snprintf(comm, len,fmt"|%d", ## a,current->thread.mode.tt.extern_pid)
--#else
--# define THREAD_NAME(comm, len, fmt, a...) \
-- snprintf(comm, len, fmt, ## a)
--#endif
--
--#ifdef HAVE_PAGE_LIST
--/* 2.4 alloc_page users can use page->list */
--#define PAGE_LIST_ENTRY list
--#define PAGE_LIST(page) ((page)->list)
--#else
--/* 2.6 alloc_page users can use page->lru */
--#define PAGE_LIST_ENTRY lru
--#define PAGE_LIST(page) ((page)->lru)
--#endif
--
--#ifndef HAVE_CPU_ONLINE
--#define cpu_online(cpu) (test_bit(cpu_online_map, &(cpu)))
--#endif
--#ifndef HAVE_CPUMASK_T
--#define cpu_set(cpu, map) (set_bit(cpu, &(map)))
--typedef unsigned long cpumask_t;
--#endif
--
--#endif /* _PORTALS_COMPAT_H */
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- * Basic library routines.
-- *
-- */
--
--#ifndef _PORTALS_LIB_H
--#define _PORTALS_LIB_H
--
--#ifndef __KERNEL__
--# include <string.h>
--#else
--# include <asm/types.h>
--#endif
--
--#undef MIN
--#define MIN(a,b) (((a)<(b)) ? (a): (b))
--#undef MAX
--#define MAX(a,b) (((a)>(b)) ? (a): (b))
--#define MKSTR(ptr) ((ptr))? (ptr) : ""
--
--static inline int size_round (int val)
--{
-- return (val + 7) & (~0x7);
--}
--
--static inline int size_round16(int val)
--{
-- return (val + 0xf) & (~0xf);
--}
--
--static inline int size_round32(int val)
--{
-- return (val + 0x1f) & (~0x1f);
--}
--
--static inline int size_round0(int val)
--{
-- if (!val)
-- return 0;
-- return (val + 1 + 7) & (~0x7);
--}
--
--static inline size_t round_strlen(char *fset)
--{
-- return size_round(strlen(fset) + 1);
- }
-
- #ifdef __KERNEL__
- static inline char *strdup(const char *str)
- {
- int len = strlen(str) + 1;
- char *tmp = kmalloc(len, GFP_KERNEL);
- if (tmp)
- memcpy(tmp, str, len);
-
- return tmp;
--}
- #endif
-
- #ifdef __KERNEL__
- # define NTOH__u32(var) le32_to_cpu(var)
- # define NTOH__u64(var) le64_to_cpu(var)
- # define HTON__u32(var) cpu_to_le32(var)
- # define HTON__u64(var) cpu_to_le64(var)
- #else
- # define expansion_u64(var) \
- ({ __u64 ret; \
- switch (sizeof(var)) { \
- case 8: (ret) = (var); break; \
- case 4: (ret) = (__u32)(var); break; \
- case 2: (ret) = (__u16)(var); break; \
- case 1: (ret) = (__u8)(var); break; \
- }; \
- (ret); \
- })
- # define NTOH__u32(var) (var)
- # define NTOH__u64(var) (expansion_u64(var))
- # define HTON__u32(var) (var)
- # define HTON__u64(var) (expansion_u64(var))
- #endif
-
- /*
- * copy sizeof(type) bytes from pointer to var and move ptr forward.
- * return EFAULT if pointer goes beyond end
- */
- #define UNLOGV(var,type,ptr,end) \
- do { \
- var = *(type *)ptr; \
- ptr += sizeof(type); \
- if (ptr > end ) \
- return -EFAULT; \
- } while (0)
-
- /* the following two macros convert to little endian */
- /* type MUST be __u32 or __u64 */
- #define LUNLOGV(var,type,ptr,end) \
- do { \
- var = NTOH##type(*(type *)ptr); \
- ptr += sizeof(type); \
- if (ptr > end ) \
- return -EFAULT; \
- } while (0)
-
- /* now log values */
- #define LOGV(var,type,ptr) \
- do { \
- *((type *)ptr) = var; \
- ptr += sizeof(type); \
- } while (0)
-
- /* and in network order */
- #define LLOGV(var,type,ptr) \
- do { \
- *((type *)ptr) = HTON##type(var); \
- ptr += sizeof(type); \
- } while (0)
-
-
- /*
- * set var to point at (type *)ptr, move ptr forward with sizeof(type)
- * return from function with EFAULT if ptr goes beyond end
- */
- #define UNLOGP(var,type,ptr,end) \
- do { \
- var = (type *)ptr; \
- ptr += sizeof(type); \
- if (ptr > end ) \
- return -EFAULT; \
- } while (0)
-
- #define LOGP(var,type,ptr) \
- do { \
- memcpy(ptr, var, sizeof(type)); \
- ptr += sizeof(type); \
- } while (0)
-
- /*
- * set var to point at (char *)ptr, move ptr forward by size_round(len);
- * return from function with EFAULT if ptr goes beyond end
- */
- #define UNLOGL(var,type,len,ptr,end) \
- do { \
- var = (type *)ptr; \
- ptr += size_round(len * sizeof(type)); \
- if (ptr > end ) \
- return -EFAULT; \
- } while (0)
-
- #define UNLOGL0(var,type,len,ptr,end) \
- do { \
- UNLOGL(var,type,len,ptr,end); \
- if ( *((char *)ptr - size_round(len) + len - 1) != '\0') \
- return -EFAULT; \
- } while (0)
--
--#define LOGL(var,len,ptr) \
--do { \
-- if (var) \
-- memcpy((char *)ptr, (const char *)var, len); \
-- ptr += size_round(len); \
--} while (0)
--
--#define LOGU(var,len,ptr) \
--do { \
-- if (var) \
-- memcpy((char *)var, (const char *)ptr, len); \
-- ptr += size_round(len); \
--} while (0)
--
--#define LOGL0(var,len,ptr) \
--do { \
-- if (!len) \
-- break; \
-- memcpy((char *)ptr, (const char *)var, len); \
-- *((char *)(ptr) + len) = 0; \
-- ptr += size_round(len + 1); \
--} while (0)
--
--#endif /* _PORTALS_LIB_H */
+++ /dev/null
--portalsdir=$(includedir)/portals
--
--if UTILS
--portals_HEADERS = list.h
--endif
--
- EXTRA_DIST = api.h api-support.h arg-blocks.h defines.h errno.h \
- internal.h lib-dispatch.h lib-nal.h lib-p30.h lib-types.h \
- list.h lltrace.h myrnal.h nal.h nalids.h p30.h ppid.h ptlctl.h \
-EXTRA_DIST = api.h api-support.h build_check.h errno.h \
- internal.h lib-p30.h lib-types.h list.h \
- lltrace.h myrnal.h nal.h nalids.h p30.h ptlctl.h \
-- socknal.h stringtab.h types.h
+++ /dev/null
- # define DEBUG_SUBSYSTEM S_PORTALS
- # define PORTAL_DEBUG
-
-#include "build_check.h"
--
--#ifndef __KERNEL__
--# include <stdio.h>
--# include <stdlib.h>
--# include <unistd.h>
--# include <time.h>
--
--/* Lots of POSIX dependencies to support PtlEQWait_timeout */
--# include <signal.h>
--# include <setjmp.h>
--# include <time.h>
--#endif
--
--#include <portals/types.h>
--#include <linux/kp30.h>
--#include <portals/p30.h>
--
--#include <portals/internal.h>
--#include <portals/nal.h>
- #include <portals/arg-blocks.h>
--
- /* Hack for 2.4.18 macro name collision */
- #ifdef yield
- #undef yield
- #endif
+++ /dev/null
--#ifndef P30_API_H
--#define P30_API_H
-
-#include "build_check.h"
--
--#include <portals/types.h>
--
- #ifndef PTL_NO_WRAP
- int PtlInit(void);
- int PtlInitialized(void);
-int PtlInit(int *);
--void PtlFini(void);
--
- int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in,
- ptl_ac_index_t acl_size_in, ptl_pid_t requested_pid,
- ptl_handle_ni_t * interface_out);
-int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid,
- ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits,
- ptl_handle_ni_t *interface_out);
--
--int PtlNIInitialized(ptl_interface_t);
--
--int PtlNIFini(ptl_handle_ni_t interface_in);
-
- #endif
--
--int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
-
-int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid);
--
--
--/*
-- * Network interfaces
-- */
-
- #ifndef PTL_NO_WRAP
- int PtlNIBarrier(ptl_handle_ni_t interface_in);
- #endif
--
--int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
-- ptl_sr_value_t * status_out);
--
--int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
-- unsigned long *distance_out);
--
- #ifndef PTL_NO_WRAP
--int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out);
- #endif
-
--
- /*
- * PtlNIDebug:
- *
- * This is not an official Portals 3 API call. It is provided
- * by the reference implementation to allow the maintainers an
- * easy way to turn on and off debugging information in the
- * library. Do not use it in code that is not intended for use
- * with any version other than the portable reference library.
- */
- unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in);
--
--/*
-- * PtlNIFailNid
-- *
-- * Not an official Portals 3 API call. It provides a way of simulating
-- * communications failures to all (nid == PTL_NID_ANY), or specific peers
-- * (via multiple calls), either until further notice (threshold == -1), or
-- * for a specific number of messages. Passing a threshold of zero, "heals"
-- * the given peer.
-- */
--int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold);
--
-/*
- * PtlSnprintHandle:
- *
- * This is not an official Portals 3 API call. It is provided
- * so that an application can print an opaque handle.
- */
-void PtlSnprintHandle (char *str, int str_len, ptl_handle_any_t handle);
--
--/*
-- * Match entries
-- */
--
--int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
-- ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
-- ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
-- ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out);
--
--int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
-- ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in,
-- ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
-- ptl_handle_me_t * handle_out);
--
--int PtlMEUnlink(ptl_handle_me_t current_in);
--
--int PtlMEUnlinkList(ptl_handle_me_t current_in);
-
- int PtlTblDump(ptl_handle_ni_t ni, int index_in);
- int PtlMEDump(ptl_handle_me_t current_in);
--
--
--
--/*
-- * Memory descriptors
-- */
--
- #ifndef PTL_NO_WRAP
--int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
-- ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
--
--int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
- ptl_handle_md_t * handle_out);
- ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
--
--int PtlMDUnlink(ptl_handle_md_t md_in);
--
--int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout,
-- ptl_md_t * new_inout, ptl_handle_eq_t testq_in);
--
- #endif
--
--/* These should not be called by users */
--int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
-- ptl_md_t * new_inout, ptl_handle_eq_t testq_in,
-- ptl_seq_t sequence_in);
--
--
--
--
--/*
-- * Event queues
-- */
- #ifndef PTL_NO_WRAP
-
- /* These should be called by users */
--int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in,
- int (*callback) (ptl_event_t * event),
- ptl_handle_eq_t * handle_out);
- ptl_eq_handler_t handler,
- ptl_handle_eq_t *handle_out);
--int PtlEQFree(ptl_handle_eq_t eventq_in);
-
- int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out);
--
--int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
--
--
--int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
--
- int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out,
- int timeout);
- #endif
-int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
- ptl_event_t *event_out, int *which_out);
--
--/*
-- * Access Control Table
-- */
--int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
-- ptl_process_id_t match_id_in, ptl_pt_index_t portal_in);
--
--
--/*
-- * Data movement
-- */
--
--int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
-- ptl_process_id_t target_in, ptl_pt_index_t portal_in,
-- ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
-- ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in);
--
--int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
-- ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
-- ptl_match_bits_t match_bits_in, ptl_size_t offset_in);
--
--
--
--#endif
+++ /dev/null
--#ifndef _P30_ERRNO_H_
--#define _P30_ERRNO_H_
--
-#include "build_check.h"
--/*
-- * include/portals/errno.h
-- *
-- * Shared error number lists
-- */
--
--/* If you change these, you must update the string table in api-errno.c */
--typedef enum {
- PTL_OK = 0,
- PTL_SEGV = 1,
- PTL_OK = 0,
- PTL_SEGV = 1,
--
- PTL_NOSPACE = 2,
- PTL_INUSE = 3,
- PTL_VAL_FAILED = 4,
- PTL_NO_SPACE = 2,
- PTL_ME_IN_USE = 3,
- PTL_VAL_FAILED = 4,
--
- PTL_NAL_FAILED = 5,
- PTL_NOINIT = 6,
- PTL_INIT_DUP = 7,
- PTL_INIT_INV = 8,
- PTL_AC_INV_INDEX = 9,
- PTL_NAL_FAILED = 5,
- PTL_NO_INIT = 6,
- PTL_IFACE_DUP = 7,
- PTL_IFACE_INVALID = 8,
--
- PTL_INV_ASIZE = 10,
- PTL_INV_HANDLE = 11,
- PTL_INV_MD = 12,
- PTL_INV_ME = 13,
- PTL_INV_NI = 14,
- PTL_HANDLE_INVALID = 9,
- PTL_MD_INVALID = 10,
- PTL_ME_INVALID = 11,
--/* If you change these, you must update the string table in api-errno.c */
- PTL_ILL_MD = 15,
- PTL_INV_PROC = 16,
- PTL_INV_PSIZE = 17,
- PTL_INV_PTINDEX = 18,
- PTL_INV_REG = 19,
- PTL_PROCESS_INVALID = 12,
- PTL_PT_INDEX_INVALID = 13,
--
- PTL_INV_SR_INDX = 20,
- PTL_ML_TOOLONG = 21,
- PTL_ADDR_UNKNOWN = 22,
- PTL_INV_EQ = 23,
- PTL_EQ_DROPPED = 24,
- PTL_SR_INDEX_INVALID = 14,
- PTL_EQ_INVALID = 15,
- PTL_EQ_DROPPED = 16,
--
- PTL_EQ_EMPTY = 25,
- PTL_NOUPDATE = 26,
- PTL_FAIL = 27,
- PTL_NOT_IMPLEMENTED = 28,
- PTL_NO_ACK = 29,
- PTL_EQ_EMPTY = 17,
- PTL_MD_NO_UPDATE = 18,
- PTL_FAIL = 19,
--
- PTL_IOV_TOO_MANY = 30,
- PTL_IOV_TOO_SMALL = 31,
- PTL_IOV_INVALID = 20,
--
- PTL_EQ_INUSE = 32,
- PTL_EQ_IN_USE = 21,
--
- PTL_MAX_ERRNO = 32
- PTL_NI_INVALID = 22,
- PTL_MD_ILLEGAL = 23,
-
- PTL_MAX_ERRNO = 24
--} ptl_err_t;
--/* If you change these, you must update the string table in api-errno.c */
--
--extern const char *ptl_err_str[];
--
--#endif
+++ /dev/null
--#ifndef _P30_INTERNAL_H_
--#define _P30_INTERNAL_H_
--
-#include "build_check.h"
--/*
-- * p30/internal.h
-- *
-- * Internals for the API level library that are not needed
-- * by the user application
-- */
--
--#include <portals/p30.h>
-
- extern int ptl_init; /* Has the library be initialized */
-
- extern int ptl_ni_init(void);
- extern int ptl_me_init(void);
- extern int ptl_md_init(void);
- extern int ptl_eq_init(void);
-
- extern int ptl_me_ni_init(nal_t * nal);
- extern int ptl_md_ni_init(nal_t * nal);
- extern int ptl_eq_ni_init(nal_t * nal);
-
- extern void ptl_ni_fini(void);
- extern void ptl_me_fini(void);
- extern void ptl_md_fini(void);
- extern void ptl_eq_fini(void);
-
- extern void ptl_me_ni_fini(nal_t * nal);
- extern void ptl_md_ni_fini(nal_t * nal);
- extern void ptl_eq_ni_fini(nal_t * nal);
--
- static inline ptl_eq_t *
- ptl_handle2usereq (ptl_handle_eq_t *handle)
- {
- /* EQ handles are a little wierd. On the "user" side, the cookie
- * is just a pointer to a queue of events in shared memory. It's
- * cb_eq_handle is the "real" handle which we pass when we
- * call do_forward(). */
- return (ptl_eq_t *)((unsigned long)handle->cookie);
- }
-extern int ptl_init; /* Has the library been initialized */
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * lib-p30.h
-- *
-- * Top level include for library side routines
-- */
--
--#ifndef _LIB_P30_H_
--#define _LIB_P30_H_
-
-#include "build_check.h"
--
--#ifdef __KERNEL__
--# include <asm/page.h>
--# include <linux/string.h>
--#else
--# include <portals/list.h>
--# include <string.h>
-# include <pthread.h>
--#endif
--#include <portals/types.h>
--#include <linux/kp30.h>
--#include <portals/p30.h>
-#include <portals/nal.h>
--#include <portals/lib-types.h>
- #include <portals/lib-nal.h>
- #include <portals/lib-dispatch.h>
--
--static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
--{
-- return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie &&
-- wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
--}
--
- #define state_lock(nal,flagsp) \
- do { \
- CDEBUG(D_PORTALS, "taking state lock\n"); \
- nal->cb_cli(nal, flagsp); \
- } while (0)
-#ifdef __KERNEL__
-#define LIB_LOCK(nal,flags) \
- spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags)
-#define LIB_UNLOCK(nal,flags) \
- spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags)
-#else
-#define LIB_LOCK(nal,flags) \
- (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0)
-#define LIB_UNLOCK(nal,flags) \
- pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex)
-#endif
--
- #define state_unlock(nal,flagsp) \
- { \
- CDEBUG(D_PORTALS, "releasing state lock\n"); \
- nal->cb_sti(nal, flagsp); \
- }
--
--#ifdef PTL_USE_LIB_FREELIST
--
--#define MAX_MES 2048
--#define MAX_MDS 2048
--#define MAX_MSGS 2048 /* Outstanding messages */
--#define MAX_EQS 512
--
- extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
- extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
-extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize);
-extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl);
--
--static inline void *
--lib_freelist_alloc (lib_freelist_t *fl)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_freeobj_t *o;
--
-- if (list_empty (&fl->fl_list))
-- return (NULL);
--
-- o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list);
-- list_del (&o->fo_list);
-- return ((void *)&o->fo_contents);
--}
--
--static inline void
--lib_freelist_free (lib_freelist_t *fl, void *obj)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
--
-- list_add (&o->fo_list, &fl->fl_list);
--}
--
--
--static inline lib_eq_t *
- lib_eq_alloc (nal_cb_t *nal)
-lib_eq_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- unsigned long flags;
-- lib_eq_t *eq;
--
- state_lock (nal, &flags);
- eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
- state_unlock (nal, &flags);
- LIB_LOCK (nal, flags);
- eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs);
- LIB_UNLOCK (nal, flags);
--
-- return (eq);
--}
--
--static inline void
- lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
-lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
--{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_eqs, eq);
- /* ALWAYS called with liblock held */
- lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq);
--}
--
--static inline lib_md_t *
- lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
-lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- unsigned long flags;
-- lib_md_t *md;
--
- state_lock (nal, &flags);
- md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
- state_unlock (nal, &flags);
- LIB_LOCK (nal, flags);
- md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds);
- LIB_UNLOCK (nal, flags);
--
-- return (md);
--}
--
--static inline void
- lib_md_free (nal_cb_t *nal, lib_md_t *md)
-lib_md_free (lib_nal_t *nal, lib_md_t *md)
--{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mds, md);
- /* ALWAYS called with liblock held */
- lib_freelist_free (&nal->libnal_ni.ni_free_mds, md);
--}
--
--static inline lib_me_t *
- lib_me_alloc (nal_cb_t *nal)
-lib_me_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- unsigned long flags;
-- lib_me_t *me;
--
- state_lock (nal, &flags);
- me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
- state_unlock (nal, &flags);
- LIB_LOCK (nal, flags);
- me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes);
- LIB_UNLOCK (nal, flags);
--
-- return (me);
--}
--
--static inline void
- lib_me_free (nal_cb_t *nal, lib_me_t *me)
-lib_me_free (lib_nal_t *nal, lib_me_t *me)
--{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mes, me);
- /* ALWAYS called with liblock held */
- lib_freelist_free (&nal->libnal_ni.ni_free_mes, me);
--}
--
--static inline lib_msg_t *
- lib_msg_alloc (nal_cb_t *nal)
-lib_msg_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- unsigned long flags;
-- lib_msg_t *msg;
--
- state_lock (nal, &flags);
- msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs);
- state_unlock (nal, &flags);
- LIB_LOCK (nal, flags);
- msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs);
- LIB_UNLOCK (nal, flags);
--
-- if (msg != NULL) {
-- /* NULL pointers, clear flags etc */
-- memset (msg, 0, sizeof (*msg));
-- msg->ack_wmd = PTL_WIRE_HANDLE_NONE;
-- }
-- return(msg);
--}
--
--static inline void
- lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
-lib_msg_free (lib_nal_t *nal, lib_msg_t *msg)
--{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_msgs, msg);
- /* ALWAYS called with liblock held */
- lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg);
--}
--
--#else
--
--static inline lib_eq_t *
- lib_eq_alloc (nal_cb_t *nal)
-lib_eq_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- lib_eq_t *eq;
--
-- PORTAL_ALLOC(eq, sizeof(*eq));
-- return (eq);
--}
--
--static inline void
- lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
-lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- PORTAL_FREE(eq, sizeof(*eq));
--}
--
--static inline lib_md_t *
- lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
-lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- lib_md_t *md;
-- int size;
-- int niov;
--
-- if ((umd->options & PTL_MD_KIOV) != 0) {
- niov = umd->niov;
- niov = umd->length;
-- size = offsetof(lib_md_t, md_iov.kiov[niov]);
-- } else {
- niov = ((umd->options & PTL_MD_IOV) != 0) ?
- umd->niov : 1;
- niov = ((umd->options & PTL_MD_IOVEC) != 0) ?
- umd->length : 1;
-- size = offsetof(lib_md_t, md_iov.iov[niov]);
-- }
--
-- PORTAL_ALLOC(md, size);
--
-- if (md != NULL) {
-- /* Set here in case of early free */
-- md->options = umd->options;
-- md->md_niov = niov;
-- }
--
-- return (md);
--}
--
--static inline void
- lib_md_free (nal_cb_t *nal, lib_md_t *md)
-lib_md_free (lib_nal_t *nal, lib_md_t *md)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- int size;
--
-- if ((md->options & PTL_MD_KIOV) != 0)
-- size = offsetof(lib_md_t, md_iov.kiov[md->md_niov]);
-- else
-- size = offsetof(lib_md_t, md_iov.iov[md->md_niov]);
--
-- PORTAL_FREE(md, size);
--}
--
--static inline lib_me_t *
- lib_me_alloc (nal_cb_t *nal)
-lib_me_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- lib_me_t *me;
--
-- PORTAL_ALLOC(me, sizeof(*me));
-- return (me);
--}
--
--static inline void
- lib_me_free(nal_cb_t *nal, lib_me_t *me)
-lib_me_free(lib_nal_t *nal, lib_me_t *me)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- PORTAL_FREE(me, sizeof(*me));
--}
--
--static inline lib_msg_t *
- lib_msg_alloc(nal_cb_t *nal)
-lib_msg_alloc(lib_nal_t *nal)
--{
- /* NEVER called with statelock held; may be in interrupt... */
- /* NEVER called with liblock held; may be in interrupt... */
-- lib_msg_t *msg;
--
-- if (in_interrupt())
-- PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg));
-- else
-- PORTAL_ALLOC(msg, sizeof(*msg));
--
-- if (msg != NULL) {
-- /* NULL pointers, clear flags etc */
-- memset (msg, 0, sizeof (*msg));
-- msg->ack_wmd = PTL_WIRE_HANDLE_NONE;
-- }
-- return (msg);
--}
--
--static inline void
- lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
-lib_msg_free(lib_nal_t *nal, lib_msg_t *msg)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- PORTAL_FREE(msg, sizeof(*msg));
--}
--#endif
--
- extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type);
- extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type);
- extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
-extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type);
-extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type);
-extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh);
--
--static inline void
- ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
-ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq)
--{
- handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
-- handle->cookie = eq->eq_lh.lh_cookie;
--}
--
--static inline lib_eq_t *
- ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
-ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
-- PTL_COOKIE_TYPE_EQ);
-- if (lh == NULL)
-- return (NULL);
--
-- return (lh_entry (lh, lib_eq_t, eq_lh));
--}
--
--static inline void
- ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
-ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md)
--{
- handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
-- handle->cookie = md->md_lh.lh_cookie;
--}
--
--static inline lib_md_t *
- ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
-ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
-- PTL_COOKIE_TYPE_MD);
-- if (lh == NULL)
-- return (NULL);
--
-- return (lh_entry (lh, lib_md_t, md_lh));
--}
--
--static inline lib_md_t *
- ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
-ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_handle_t *lh;
--
- if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
- if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie)
-- return (NULL);
--
-- lh = lib_lookup_cookie (nal, wh->wh_object_cookie,
-- PTL_COOKIE_TYPE_MD);
-- if (lh == NULL)
-- return (NULL);
--
-- return (lh_entry (lh, lib_md_t, md_lh));
--}
--
--static inline void
- ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
-ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me)
--{
- handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
-- handle->cookie = me->me_lh.lh_cookie;
--}
--
--static inline lib_me_t *
- ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
-ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
-- PTL_COOKIE_TYPE_ME);
-- if (lh == NULL)
-- return (NULL);
--
-- return (lh_entry (lh, lib_me_t, me_lh));
--}
--
- extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize,
- ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size);
- extern int lib_fini(nal_cb_t * cb);
- extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
- void *arg_block, void *ret_block);
- extern char *dispatch_name(int index);
-extern int lib_init(lib_nal_t *libnal, nal_t *apinal,
- ptl_process_id_t pid,
- ptl_ni_limits_t *desired_limits,
- ptl_ni_limits_t *actual_limits);
-extern int lib_fini(lib_nal_t *libnal);
--
--/*
- * When the NAL detects an incoming message, it should call
- * lib_parse() decode it. The NAL callbacks will be handed
- * the private cookie as a way for the NAL to maintain state
- * about which transaction is being processed. An extra parameter,
- * lib_cookie will contain the necessary information for
- * finalizing the message.
- *
- * After it has finished the handling the message, it should
- * call lib_finalize() with the lib_cookie parameter.
- * Call backs will be made to write events, send acks or
- * replies and so on.
- * When the NAL detects an incoming message header, it should call
- * lib_parse() decode it. If the message header is garbage, lib_parse()
- * returns immediately with failure, otherwise the NAL callbacks will be
- * called to receive the message body. They are handed the private cookie
- * as a way for the NAL to maintain state about which transaction is being
- * processed. An extra parameter, lib_msg contains the lib-level message
- * state for passing to lib_finalize() when the message body has been
- * received.
-- */
- extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
-extern void lib_enq_event_locked (lib_nal_t *nal, void *private,
-- lib_eq_t *eq, ptl_event_t *ev);
- extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg,
- ptl_err_t status);
- extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
- extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
- lib_md_t *getmd);
- extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
-extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg,
- ptl_ni_fail_t ni_fail_type);
-extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid,
- lib_msg_t *get_msg);
-extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr);
--
--
--extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
--extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov,
-- ptl_size_t offset, ptl_size_t len);
--extern void lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset,
-- char *src, ptl_size_t len);
--extern int lib_extract_iov (int dst_niov, struct iovec *dst,
-- int src_niov, struct iovec *src,
-- ptl_size_t offset, ptl_size_t len);
--
--extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov);
--extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov,
-- ptl_size_t offset, ptl_size_t len);
--extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset,
-- char *src, ptl_size_t len);
--extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
-- int src_niov, ptl_kiov_t *src,
-- ptl_size_t offset, ptl_size_t len);
--
--extern void lib_assert_wire_constants (void);
--
- extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
-extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
-- ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
- extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
-extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
-- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-- lib_md_t *md, ptl_size_t offset, ptl_size_t len);
--
- extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
- ptl_md_t * md_out);
- extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
- extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
-extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx,
- ptl_sr_value_t *status);
-extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid,
- unsigned long *dist);
-
-extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count,
- ptl_eq_handler_t callback,
- ptl_handle_eq_t *handle);
-extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh);
-extern int lib_api_eq_poll (nal_t *nal,
- ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
- ptl_event_t *event, int *which);
-
-extern int lib_api_me_attach(nal_t *nal,
- ptl_pt_index_t portal,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits,
- ptl_match_bits_t ignore_bits,
- ptl_unlink_t unlink, ptl_ins_pos_t pos,
- ptl_handle_me_t *handle);
-extern int lib_api_me_insert(nal_t *nal,
- ptl_handle_me_t *current_meh,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits,
- ptl_match_bits_t ignore_bits,
- ptl_unlink_t unlink, ptl_ins_pos_t pos,
- ptl_handle_me_t *handle);
-extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh);
-extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me);
-
-extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid);
-
-extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md);
-extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd);
-extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh,
- ptl_md_t *umd, ptl_unlink_t unlink,
- ptl_handle_md_t *handle);
-extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink,
- ptl_handle_md_t *handle);
-extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh);
-extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh,
- ptl_md_t *oldumd, ptl_md_t *newumd,
- ptl_handle_eq_t *testqh);
-
-extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh,
- ptl_process_id_t *id,
- ptl_pt_index_t portal, ptl_ac_index_t ac,
- ptl_match_bits_t match_bits, ptl_size_t offset);
-extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
- ptl_ack_req_t ack, ptl_process_id_t *id,
- ptl_pt_index_t portal, ptl_ac_index_t ac,
- ptl_match_bits_t match_bits,
- ptl_size_t offset, ptl_hdr_data_t hdr_data);
-extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold);
-
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * lib-p30.h
-- *
-- * Top level include for library side routines
-- */
--
--#ifndef _LIB_P30_H_
--#define _LIB_P30_H_
-
-#include "build_check.h"
--
--#ifdef __KERNEL__
--# include <asm/page.h>
--# include <linux/string.h>
--#else
--# include <portals/list.h>
--# include <string.h>
-# include <pthread.h>
--#endif
--#include <portals/types.h>
--#include <linux/kp30.h>
--#include <portals/p30.h>
-#include <portals/nal.h>
--#include <portals/lib-types.h>
- #include <portals/lib-nal.h>
- #include <portals/lib-dispatch.h>
--
--static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
--{
-- return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie &&
-- wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
--}
--
- #define state_lock(nal,flagsp) \
- do { \
- CDEBUG(D_PORTALS, "taking state lock\n"); \
- nal->cb_cli(nal, flagsp); \
- } while (0)
-#ifdef __KERNEL__
-#define LIB_LOCK(nal,flags) \
- spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags)
-#define LIB_UNLOCK(nal,flags) \
- spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags)
-#else
-#define LIB_LOCK(nal,flags) \
- (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0)
-#define LIB_UNLOCK(nal,flags) \
- pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex)
-#endif
--
- #define state_unlock(nal,flagsp) \
- { \
- CDEBUG(D_PORTALS, "releasing state lock\n"); \
- nal->cb_sti(nal, flagsp); \
- }
--
--#ifdef PTL_USE_LIB_FREELIST
--
--#define MAX_MES 2048
--#define MAX_MDS 2048
--#define MAX_MSGS 2048 /* Outstanding messages */
--#define MAX_EQS 512
--
- extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize);
- extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl);
-extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize);
-extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl);
--
--static inline void *
--lib_freelist_alloc (lib_freelist_t *fl)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_freeobj_t *o;
--
-- if (list_empty (&fl->fl_list))
-- return (NULL);
--
-- o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list);
-- list_del (&o->fo_list);
-- return ((void *)&o->fo_contents);
--}
--
--static inline void
--lib_freelist_free (lib_freelist_t *fl, void *obj)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
--
-- list_add (&o->fo_list, &fl->fl_list);
--}
--
--
--static inline lib_eq_t *
- lib_eq_alloc (nal_cb_t *nal)
-lib_eq_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- unsigned long flags;
-- lib_eq_t *eq;
--
- state_lock (nal, &flags);
- eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs);
- state_unlock (nal, &flags);
- LIB_LOCK (nal, flags);
- eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs);
- LIB_UNLOCK (nal, flags);
--
-- return (eq);
--}
--
--static inline void
- lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
-lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
--{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_eqs, eq);
- /* ALWAYS called with liblock held */
- lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq);
--}
--
--static inline lib_md_t *
- lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
-lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- unsigned long flags;
-- lib_md_t *md;
--
- state_lock (nal, &flags);
- md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds);
- state_unlock (nal, &flags);
- LIB_LOCK (nal, flags);
- md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds);
- LIB_UNLOCK (nal, flags);
--
-- return (md);
--}
--
--static inline void
- lib_md_free (nal_cb_t *nal, lib_md_t *md)
-lib_md_free (lib_nal_t *nal, lib_md_t *md)
--{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mds, md);
- /* ALWAYS called with liblock held */
- lib_freelist_free (&nal->libnal_ni.ni_free_mds, md);
--}
--
--static inline lib_me_t *
- lib_me_alloc (nal_cb_t *nal)
-lib_me_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- unsigned long flags;
-- lib_me_t *me;
--
- state_lock (nal, &flags);
- me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes);
- state_unlock (nal, &flags);
- LIB_LOCK (nal, flags);
- me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes);
- LIB_UNLOCK (nal, flags);
--
-- return (me);
--}
--
--static inline void
- lib_me_free (nal_cb_t *nal, lib_me_t *me)
-lib_me_free (lib_nal_t *nal, lib_me_t *me)
--{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_mes, me);
- /* ALWAYS called with liblock held */
- lib_freelist_free (&nal->libnal_ni.ni_free_mes, me);
--}
--
--static inline lib_msg_t *
- lib_msg_alloc (nal_cb_t *nal)
-lib_msg_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- unsigned long flags;
-- lib_msg_t *msg;
--
- state_lock (nal, &flags);
- msg = (lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs);
- state_unlock (nal, &flags);
- LIB_LOCK (nal, flags);
- msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs);
- LIB_UNLOCK (nal, flags);
--
-- if (msg != NULL) {
-- /* NULL pointers, clear flags etc */
-- memset (msg, 0, sizeof (*msg));
-- msg->ack_wmd = PTL_WIRE_HANDLE_NONE;
-- }
-- return(msg);
--}
--
--static inline void
- lib_msg_free (nal_cb_t *nal, lib_msg_t *msg)
-lib_msg_free (lib_nal_t *nal, lib_msg_t *msg)
--{
- /* ALWAYS called with statelock held */
- lib_freelist_free (&nal->ni.ni_free_msgs, msg);
- /* ALWAYS called with liblock held */
- lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg);
--}
--
--#else
--
--static inline lib_eq_t *
- lib_eq_alloc (nal_cb_t *nal)
-lib_eq_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- lib_eq_t *eq;
--
-- PORTAL_ALLOC(eq, sizeof(*eq));
-- return (eq);
--}
--
--static inline void
- lib_eq_free (nal_cb_t *nal, lib_eq_t *eq)
-lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- PORTAL_FREE(eq, sizeof(*eq));
--}
--
--static inline lib_md_t *
- lib_md_alloc (nal_cb_t *nal, ptl_md_t *umd)
-lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- lib_md_t *md;
-- int size;
-- int niov;
--
-- if ((umd->options & PTL_MD_KIOV) != 0) {
- niov = umd->niov;
- niov = umd->length;
-- size = offsetof(lib_md_t, md_iov.kiov[niov]);
-- } else {
- niov = ((umd->options & PTL_MD_IOV) != 0) ?
- umd->niov : 1;
- niov = ((umd->options & PTL_MD_IOVEC) != 0) ?
- umd->length : 1;
-- size = offsetof(lib_md_t, md_iov.iov[niov]);
-- }
--
-- PORTAL_ALLOC(md, size);
--
-- if (md != NULL) {
-- /* Set here in case of early free */
-- md->options = umd->options;
-- md->md_niov = niov;
-- }
--
-- return (md);
--}
--
--static inline void
- lib_md_free (nal_cb_t *nal, lib_md_t *md)
-lib_md_free (lib_nal_t *nal, lib_md_t *md)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- int size;
--
-- if ((md->options & PTL_MD_KIOV) != 0)
-- size = offsetof(lib_md_t, md_iov.kiov[md->md_niov]);
-- else
-- size = offsetof(lib_md_t, md_iov.iov[md->md_niov]);
--
-- PORTAL_FREE(md, size);
--}
--
--static inline lib_me_t *
- lib_me_alloc (nal_cb_t *nal)
-lib_me_alloc (lib_nal_t *nal)
--{
- /* NEVER called with statelock held */
- /* NEVER called with liblock held */
-- lib_me_t *me;
--
-- PORTAL_ALLOC(me, sizeof(*me));
-- return (me);
--}
--
--static inline void
- lib_me_free(nal_cb_t *nal, lib_me_t *me)
-lib_me_free(lib_nal_t *nal, lib_me_t *me)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- PORTAL_FREE(me, sizeof(*me));
--}
--
--static inline lib_msg_t *
- lib_msg_alloc(nal_cb_t *nal)
-lib_msg_alloc(lib_nal_t *nal)
--{
- /* NEVER called with statelock held; may be in interrupt... */
- /* NEVER called with liblock held; may be in interrupt... */
-- lib_msg_t *msg;
--
-- if (in_interrupt())
-- PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg));
-- else
-- PORTAL_ALLOC(msg, sizeof(*msg));
--
-- if (msg != NULL) {
-- /* NULL pointers, clear flags etc */
-- memset (msg, 0, sizeof (*msg));
-- msg->ack_wmd = PTL_WIRE_HANDLE_NONE;
-- }
-- return (msg);
--}
--
--static inline void
- lib_msg_free(nal_cb_t *nal, lib_msg_t *msg)
-lib_msg_free(lib_nal_t *nal, lib_msg_t *msg)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- PORTAL_FREE(msg, sizeof(*msg));
--}
--#endif
--
- extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type);
- extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type);
- extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh);
-extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type);
-extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type);
-extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh);
--
--static inline void
- ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq)
-ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq)
--{
- handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
-- handle->cookie = eq->eq_lh.lh_cookie;
--}
--
--static inline lib_eq_t *
- ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal)
-ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
-- PTL_COOKIE_TYPE_EQ);
-- if (lh == NULL)
-- return (NULL);
--
-- return (lh_entry (lh, lib_eq_t, eq_lh));
--}
--
--static inline void
- ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md)
-ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md)
--{
- handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
-- handle->cookie = md->md_lh.lh_cookie;
--}
--
--static inline lib_md_t *
- ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal)
-ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
-- PTL_COOKIE_TYPE_MD);
-- if (lh == NULL)
-- return (NULL);
--
-- return (lh_entry (lh, lib_md_t, md_lh));
--}
--
--static inline lib_md_t *
- ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal)
-ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_handle_t *lh;
--
- if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie)
- if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie)
-- return (NULL);
--
-- lh = lib_lookup_cookie (nal, wh->wh_object_cookie,
-- PTL_COOKIE_TYPE_MD);
-- if (lh == NULL)
-- return (NULL);
--
-- return (lh_entry (lh, lib_md_t, md_lh));
--}
--
--static inline void
- ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me)
-ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me)
--{
- handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
-- handle->cookie = me->me_lh.lh_cookie;
--}
--
--static inline lib_me_t *
- ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal)
-ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal)
--{
- /* ALWAYS called with statelock held */
- /* ALWAYS called with liblock held */
-- lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
-- PTL_COOKIE_TYPE_ME);
-- if (lh == NULL)
-- return (NULL);
--
-- return (lh_entry (lh, lib_me_t, me_lh));
--}
--
- extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize,
- ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size);
- extern int lib_fini(nal_cb_t * cb);
- extern void lib_dispatch(nal_cb_t * cb, void *private, int index,
- void *arg_block, void *ret_block);
- extern char *dispatch_name(int index);
-extern int lib_init(lib_nal_t *libnal, nal_t *apinal,
- ptl_process_id_t pid,
- ptl_ni_limits_t *desired_limits,
- ptl_ni_limits_t *actual_limits);
-extern int lib_fini(lib_nal_t *libnal);
--
--/*
- * When the NAL detects an incoming message, it should call
- * lib_parse() decode it. The NAL callbacks will be handed
- * the private cookie as a way for the NAL to maintain state
- * about which transaction is being processed. An extra parameter,
- * lib_cookie will contain the necessary information for
- * finalizing the message.
- *
- * After it has finished the handling the message, it should
- * call lib_finalize() with the lib_cookie parameter.
- * Call backs will be made to write events, send acks or
- * replies and so on.
- * When the NAL detects an incoming message header, it should call
- * lib_parse() decode it. If the message header is garbage, lib_parse()
- * returns immediately with failure, otherwise the NAL callbacks will be
- * called to receive the message body. They are handed the private cookie
- * as a way for the NAL to maintain state about which transaction is being
- * processed. An extra parameter, lib_msg contains the lib-level message
- * state for passing to lib_finalize() when the message body has been
- * received.
-- */
- extern void lib_enq_event_locked (nal_cb_t *nal, void *private,
-extern void lib_enq_event_locked (lib_nal_t *nal, void *private,
-- lib_eq_t *eq, ptl_event_t *ev);
- extern void lib_finalize (nal_cb_t *nal, void *private, lib_msg_t *msg,
- ptl_err_t status);
- extern void lib_parse (nal_cb_t *nal, ptl_hdr_t *hdr, void *private);
- extern lib_msg_t *lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid,
- lib_md_t *getmd);
- extern void print_hdr (nal_cb_t * nal, ptl_hdr_t * hdr);
-extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg,
- ptl_ni_fail_t ni_fail_type);
-extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid,
- lib_msg_t *get_msg);
-extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr);
--
--
--extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
--extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov,
-- ptl_size_t offset, ptl_size_t len);
--extern void lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset,
-- char *src, ptl_size_t len);
--extern int lib_extract_iov (int dst_niov, struct iovec *dst,
-- int src_niov, struct iovec *src,
-- ptl_size_t offset, ptl_size_t len);
--
--extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov);
--extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov,
-- ptl_size_t offset, ptl_size_t len);
--extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset,
-- char *src, ptl_size_t len);
--extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
-- int src_niov, ptl_kiov_t *src,
-- ptl_size_t offset, ptl_size_t len);
--
--extern void lib_assert_wire_constants (void);
--
- extern ptl_err_t lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
-extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
-- ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
- extern ptl_err_t lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
-extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
-- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-- lib_md_t *md, ptl_size_t offset, ptl_size_t len);
--
- extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in,
- ptl_md_t * md_out);
- extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in);
- extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in);
-extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx,
- ptl_sr_value_t *status);
-extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid,
- unsigned long *dist);
-
-extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count,
- ptl_eq_handler_t callback,
- ptl_handle_eq_t *handle);
-extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh);
-extern int lib_api_eq_poll (nal_t *nal,
- ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
- ptl_event_t *event, int *which);
-
-extern int lib_api_me_attach(nal_t *nal,
- ptl_pt_index_t portal,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits,
- ptl_match_bits_t ignore_bits,
- ptl_unlink_t unlink, ptl_ins_pos_t pos,
- ptl_handle_me_t *handle);
-extern int lib_api_me_insert(nal_t *nal,
- ptl_handle_me_t *current_meh,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits,
- ptl_match_bits_t ignore_bits,
- ptl_unlink_t unlink, ptl_ins_pos_t pos,
- ptl_handle_me_t *handle);
-extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh);
-extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me);
-
-extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid);
-
-extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md);
-extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd);
-extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh,
- ptl_md_t *umd, ptl_unlink_t unlink,
- ptl_handle_md_t *handle);
-extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink,
- ptl_handle_md_t *handle);
-extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh);
-extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh,
- ptl_md_t *oldumd, ptl_md_t *newumd,
- ptl_handle_eq_t *testqh);
-
-extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh,
- ptl_process_id_t *id,
- ptl_pt_index_t portal, ptl_ac_index_t ac,
- ptl_match_bits_t match_bits, ptl_size_t offset);
-extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
- ptl_ack_req_t ack, ptl_process_id_t *id,
- ptl_pt_index_t portal, ptl_ac_index_t ac,
- ptl_match_bits_t match_bits,
- ptl_size_t offset, ptl_hdr_data_t hdr_data);
-extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold);
-
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * p30/lib-types.h
-- *
-- * Types used by the library side routines that do not need to be
-- * exposed to the user application
-- */
--
--#ifndef _LIB_TYPES_H_
--#define _LIB_TYPES_H_
-
-#include "build_check.h"
--
--#include <portals/types.h>
-#include <portals/nal.h>
--#ifdef __KERNEL__
--# include <linux/uio.h>
--# include <linux/smp_lock.h>
--# include <linux/types.h>
--#else
--# define PTL_USE_LIB_FREELIST
--# include <sys/types.h>
--#endif
-
- /* struct nal_cb_t is defined in lib-nal.h */
- typedef struct nal_cb_t nal_cb_t;
--
--typedef char *user_ptr;
--typedef struct lib_msg_t lib_msg_t;
--typedef struct lib_ptl_t lib_ptl_t;
--typedef struct lib_ac_t lib_ac_t;
--typedef struct lib_me_t lib_me_t;
--typedef struct lib_md_t lib_md_t;
--typedef struct lib_eq_t lib_eq_t;
--
--#define WIRE_ATTR __attribute__((packed))
--
--/* The wire handle's interface cookie only matches one network interface in
-- * one epoch (i.e. new cookie when the interface restarts or the node
-- * reboots). The object cookie only matches one object on that interface
-- * during that object's lifetime (i.e. no cookie re-use). */
--typedef struct {
-- __u64 wh_interface_cookie;
-- __u64 wh_object_cookie;
--} WIRE_ATTR ptl_handle_wire_t;
--
--/* byte-flip insensitive! */
--#define PTL_WIRE_HANDLE_NONE \
--((const ptl_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1})
--
--typedef enum {
-- PTL_MSG_ACK = 0,
-- PTL_MSG_PUT,
-- PTL_MSG_GET,
-- PTL_MSG_REPLY,
-- PTL_MSG_HELLO,
--} ptl_msg_type_t;
--
--/* The variant fields of the portals message header are aligned on an 8
-- * byte boundary in the message header. Note that all types used in these
-- * wire structs MUST be fixed size and the smaller types are placed at the
-- * end. */
--typedef struct ptl_ack {
-- ptl_handle_wire_t dst_wmd;
-- ptl_match_bits_t match_bits;
-- ptl_size_t mlength;
--} WIRE_ATTR ptl_ack_t;
--
--typedef struct ptl_put {
-- ptl_handle_wire_t ack_wmd;
-- ptl_match_bits_t match_bits;
-- ptl_hdr_data_t hdr_data;
-- ptl_pt_index_t ptl_index;
-- ptl_size_t offset;
--} WIRE_ATTR ptl_put_t;
--
--typedef struct ptl_get {
-- ptl_handle_wire_t return_wmd;
-- ptl_match_bits_t match_bits;
-- ptl_pt_index_t ptl_index;
-- ptl_size_t src_offset;
-- ptl_size_t sink_length;
--} WIRE_ATTR ptl_get_t;
--
--typedef struct ptl_reply {
-- ptl_handle_wire_t dst_wmd;
--} WIRE_ATTR ptl_reply_t;
--
--typedef struct ptl_hello {
-- __u64 incarnation;
-- __u32 type;
--} WIRE_ATTR ptl_hello_t;
--
--typedef struct {
-- ptl_nid_t dest_nid;
-- ptl_nid_t src_nid;
-- ptl_pid_t dest_pid;
-- ptl_pid_t src_pid;
-- __u32 type; /* ptl_msg_type_t */
-- __u32 payload_length; /* payload data to follow */
-- /*<------__u64 aligned------->*/
-- union {
-- ptl_ack_t ack;
-- ptl_put_t put;
-- ptl_get_t get;
-- ptl_reply_t reply;
-- ptl_hello_t hello;
-- } msg;
--} WIRE_ATTR ptl_hdr_t;
--
--/* A HELLO message contains the portals magic number and protocol version
-- * code in the header's dest_nid, the peer's NID in the src_nid, and
-- * PTL_MSG_HELLO in the type field. All other common fields are zero
-- * (including payload_size; i.e. no payload).
-- * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is
-- * running the same protocol and to find out its NID, so that hosts with
-- * multiple IP interfaces can have a single NID. These NALs should exchange
-- * HELLO messages when a connection is first established.
-- * Individual NALs can put whatever else they fancy in ptl_hdr_t::msg.
-- */
--typedef struct {
-- __u32 magic; /* PORTALS_PROTO_MAGIC */
-- __u16 version_major; /* increment on incompatible change */
-- __u16 version_minor; /* increment on compatible change */
--} WIRE_ATTR ptl_magicversion_t;
--
--#define PORTALS_PROTO_MAGIC 0xeebc0ded
--
- #define PORTALS_PROTO_VERSION_MAJOR 0
- #define PORTALS_PROTO_VERSION_MINOR 3
-#define PORTALS_PROTO_VERSION_MAJOR 1
-#define PORTALS_PROTO_VERSION_MINOR 0
--
--typedef struct {
-- long recv_count, recv_length, send_count, send_length, drop_count,
-- drop_length, msgs_alloc, msgs_max;
--} lib_counters_t;
--
--/* temporary expedient: limit number of entries in discontiguous MDs */
- #define PTL_MTU (512<<10)
- #define PTL_MD_MAX_IOV 128
-#define PTL_MTU (1<<20)
-#define PTL_MD_MAX_IOV 256
--
--struct lib_msg_t {
-- struct list_head msg_list;
-- lib_md_t *md;
-- ptl_handle_wire_t ack_wmd;
-- ptl_event_t ev;
--};
--
--struct lib_ptl_t {
-- ptl_pt_index_t size;
-- struct list_head *tbl;
--};
--
--struct lib_ac_t {
-- int next_free;
--};
--
--typedef struct {
-- struct list_head lh_hash_chain;
-- __u64 lh_cookie;
--} lib_handle_t;
--
--#define lh_entry(ptr, type, member) \
-- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
--
--struct lib_eq_t {
-- struct list_head eq_list;
-- lib_handle_t eq_lh;
- ptl_seq_t sequence;
- ptl_size_t size;
- ptl_event_t *base;
- ptl_seq_t eq_enq_seq;
- ptl_seq_t eq_deq_seq;
- ptl_size_t eq_size;
- ptl_event_t *eq_events;
-- int eq_refcount;
- int (*event_callback) (ptl_event_t * event);
- ptl_eq_handler_t eq_callback;
-- void *eq_addrkey;
--};
--
--struct lib_me_t {
-- struct list_head me_list;
-- lib_handle_t me_lh;
-- ptl_process_id_t match_id;
-- ptl_match_bits_t match_bits, ignore_bits;
-- ptl_unlink_t unlink;
-- lib_md_t *md;
--};
--
--struct lib_md_t {
-- struct list_head md_list;
-- lib_handle_t md_lh;
-- lib_me_t *me;
-- user_ptr start;
-- ptl_size_t offset;
-- ptl_size_t length;
-- ptl_size_t max_size;
-- int threshold;
-- int pending;
- ptl_unlink_t unlink;
-- unsigned int options;
-- unsigned int md_flags;
-- void *user_ptr;
-- lib_eq_t *eq;
-- void *md_addrkey;
-- unsigned int md_niov; /* # frags */
-- union {
-- struct iovec iov[PTL_MD_MAX_IOV];
-- ptl_kiov_t kiov[PTL_MD_MAX_IOV];
-- } md_iov;
--};
--
- #define PTL_MD_FLAG_UNLINK (1 << 0)
-#define PTL_MD_FLAG_ZOMBIE (1 << 0)
-#define PTL_MD_FLAG_AUTO_UNLINK (1 << 1)
-
-static inline int lib_md_exhausted (lib_md_t *md)
-{
- return (md->threshold == 0 ||
- ((md->options & PTL_MD_MAX_SIZE) != 0 &&
- md->offset + md->max_size > md->length));
-}
--
--#ifdef PTL_USE_LIB_FREELIST
--typedef struct
--{
-- void *fl_objs; /* single contiguous array of objects */
-- int fl_nobjs; /* the number of them */
-- int fl_objsize; /* the size (including overhead) of each of them */
-- struct list_head fl_list; /* where they are enqueued */
--} lib_freelist_t;
--
--typedef struct
--{
-- struct list_head fo_list; /* enqueue on fl_list */
-- void *fo_contents; /* aligned contents */
--} lib_freeobj_t;
--#endif
--
--typedef struct {
-- /* info about peers we are trying to fail */
-- struct list_head tp_list; /* stash in ni.ni_test_peers */
-- ptl_nid_t tp_nid; /* matching nid */
-- unsigned int tp_threshold; /* # failures to simulate */
--} lib_test_peer_t;
--
--#define PTL_COOKIE_TYPE_MD 1
--#define PTL_COOKIE_TYPE_ME 2
--#define PTL_COOKIE_TYPE_EQ 3
--#define PTL_COOKIE_TYPES 4
--/* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be
-- * extracted by masking with (PTL_COOKIE_TYPES - 1) */
--
- typedef struct {
- int up;
- int refcnt;
- ptl_nid_t nid;
- ptl_pid_t pid;
- int num_nodes;
- unsigned int debug;
- lib_ptl_t tbl;
- lib_ac_t ac;
- lib_counters_t counters;
-typedef struct lib_ni
-{
- nal_t *ni_api;
- ptl_process_id_t ni_pid;
- lib_ptl_t ni_portals;
- lib_counters_t ni_counters;
- ptl_ni_limits_t ni_actual_limits;
--
-- int ni_lh_hash_size; /* size of lib handle hash table */
-- struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */
-- __u64 ni_next_object_cookie; /* cookie generator */
-- __u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */
--
- struct list_head ni_test_peers;
- struct list_head ni_test_peers;
--
--#ifdef PTL_USE_LIB_FREELIST
- lib_freelist_t ni_free_mes;
- lib_freelist_t ni_free_msgs;
- lib_freelist_t ni_free_mds;
- lib_freelist_t ni_free_eqs;
- lib_freelist_t ni_free_mes;
- lib_freelist_t ni_free_msgs;
- lib_freelist_t ni_free_mds;
- lib_freelist_t ni_free_eqs;
--#endif
- struct list_head ni_active_msgs;
- struct list_head ni_active_mds;
- struct list_head ni_active_eqs;
-
- struct list_head ni_active_msgs;
- struct list_head ni_active_mds;
- struct list_head ni_active_eqs;
-
-#ifdef __KERNEL__
- spinlock_t ni_lock;
- wait_queue_head_t ni_waitq;
-#else
- pthread_mutex_t ni_mutex;
- pthread_cond_t ni_cond;
-#endif
--} lib_ni_t;
-
-
-typedef struct lib_nal
-{
- /* lib-level interface state */
- lib_ni_t libnal_ni;
-
- /* NAL-private data */
- void *libnal_data;
-
- /*
- * send: Sends a preformatted header and payload data to a
- * specified remote process. The payload is scattered over 'niov'
- * fragments described by iov, starting at 'offset' for 'mlen'
- * bytes.
- * NB the NAL may NOT overwrite iov.
- * PTL_OK on success => NAL has committed to send and will call
- * lib_finalize on completion
- */
- ptl_err_t (*libnal_send)
- (struct lib_nal *nal, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov,
- size_t offset, size_t mlen);
-
- /* as send, but with a set of page fragments (NULL if not supported) */
- ptl_err_t (*libnal_send_pages)
- (struct lib_nal *nal, void *private, lib_msg_t * cookie,
- ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int niov, ptl_kiov_t *iov,
- size_t offset, size_t mlen);
- /*
- * recv: Receives an incoming message from a remote process. The
- * payload is to be received into the scattered buffer of 'niov'
- * fragments described by iov, starting at 'offset' for 'mlen'
- * bytes. Payload bytes after 'mlen' up to 'rlen' are to be
- * discarded.
- * NB the NAL may NOT overwrite iov.
- * PTL_OK on success => NAL has committed to receive and will call
- * lib_finalize on completion
- */
- ptl_err_t (*libnal_recv)
- (struct lib_nal *nal, void *private, lib_msg_t * cookie,
- unsigned int niov, struct iovec *iov,
- size_t offset, size_t mlen, size_t rlen);
-
- /* as recv, but with a set of page fragments (NULL if not supported) */
- ptl_err_t (*libnal_recv_pages)
- (struct lib_nal *nal, void *private, lib_msg_t * cookie,
- unsigned int niov, ptl_kiov_t *iov,
- size_t offset, size_t mlen, size_t rlen);
-
- /*
- * (un)map: Tell the NAL about some memory it will access.
- * *addrkey passed to libnal_unmap() is what libnal_map() set it to.
- * type of *iov depends on options.
- * Set to NULL if not required.
- */
- ptl_err_t (*libnal_map)
- (struct lib_nal *nal, unsigned int niov, struct iovec *iov,
- void **addrkey);
- void (*libnal_unmap)
- (struct lib_nal *nal, unsigned int niov, struct iovec *iov,
- void **addrkey);
-
- /* as (un)map, but with a set of page fragments */
- ptl_err_t (*libnal_map_pages)
- (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov,
- void **addrkey);
- void (*libnal_unmap_pages)
- (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov,
- void **addrkey);
-
- void (*libnal_printf)(struct lib_nal *nal, const char *fmt, ...);
-
- /* Calculate a network "distance" to given node */
- int (*libnal_dist) (struct lib_nal *nal, ptl_nid_t nid, unsigned long *dist);
-} lib_nal_t;
--
--#endif
+++ /dev/null
--#ifndef _LINUX_LIST_H
--/*
-- * Simple doubly linked list implementation.
-- *
-- * Some of the internal functions ("__xxx") are useful when
-- * manipulating whole lists rather than single entries, as
-- * sometimes we already know the next/prev entries and we can
-- * generate better code by using them directly rather than
-- * using the generic single-entry routines.
-- */
--
--struct list_head {
-- struct list_head *next, *prev;
--};
--
--typedef struct list_head list_t;
--
--#define LIST_HEAD_INIT(name) { &(name), &(name) }
--
--#define LIST_HEAD(name) \
-- struct list_head name = LIST_HEAD_INIT(name)
--
--#define INIT_LIST_HEAD(ptr) do { \
-- (ptr)->next = (ptr); (ptr)->prev = (ptr); \
--} while (0)
--
--/*
-- * Insert a new entry between two known consecutive entries.
-- *
-- * This is only for internal list manipulation where we know
-- * the prev/next entries already!
-- */
--static inline void __list_add(struct list_head * new,
-- struct list_head * prev,
-- struct list_head * next)
--{
-- next->prev = new;
-- new->next = next;
-- new->prev = prev;
-- prev->next = new;
--}
--
--/**
-- * list_add - add a new entry
-- * @new: new entry to be added
-- * @head: list head to add it after
-- *
-- * Insert a new entry after the specified head.
-- * This is good for implementing stacks.
-- */
--static inline void list_add(struct list_head *new, struct list_head *head)
--{
-- __list_add(new, head, head->next);
--}
--
--/**
-- * list_add_tail - add a new entry
-- * @new: new entry to be added
-- * @head: list head to add it before
-- *
-- * Insert a new entry before the specified head.
-- * This is useful for implementing queues.
-- */
--static inline void list_add_tail(struct list_head *new, struct list_head *head)
--{
-- __list_add(new, head->prev, head);
--}
--
--/*
-- * Delete a list entry by making the prev/next entries
-- * point to each other.
-- *
-- * This is only for internal list manipulation where we know
-- * the prev/next entries already!
-- */
--static inline void __list_del(struct list_head * prev, struct list_head * next)
--{
-- next->prev = prev;
-- prev->next = next;
--}
--
--/**
-- * list_del - deletes entry from list.
-- * @entry: the element to delete from the list.
-- * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
-- */
--static inline void list_del(struct list_head *entry)
--{
-- __list_del(entry->prev, entry->next);
--}
--
--/**
-- * list_del_init - deletes entry from list and reinitialize it.
-- * @entry: the element to delete from the list.
-- */
--static inline void list_del_init(struct list_head *entry)
--{
-- __list_del(entry->prev, entry->next);
-- INIT_LIST_HEAD(entry);
--}
--#endif
--
--#ifndef list_for_each_entry
--/**
-- * list_move - delete from one list and add as another's head
-- * @list: the entry to move
-- * @head: the head that will precede our entry
-- */
--static inline void list_move(struct list_head *list, struct list_head *head)
--{
-- __list_del(list->prev, list->next);
-- list_add(list, head);
--}
--
--/**
-- * list_move_tail - delete from one list and add as another's tail
-- * @list: the entry to move
-- * @head: the head that will follow our entry
-- */
--static inline void list_move_tail(struct list_head *list,
-- struct list_head *head)
--{
-- __list_del(list->prev, list->next);
-- list_add_tail(list, head);
--}
--#endif
--
--#ifndef _LINUX_LIST_H
--#define _LINUX_LIST_H
--/**
-- * list_empty - tests whether a list is empty
-- * @head: the list to test.
-- */
--static inline int list_empty(struct list_head *head)
--{
-- return head->next == head;
--}
--
--static inline void __list_splice(struct list_head *list,
-- struct list_head *head)
--{
-- struct list_head *first = list->next;
-- struct list_head *last = list->prev;
-- struct list_head *at = head->next;
--
-- first->prev = head;
-- head->next = first;
--
-- last->next = at;
-- at->prev = last;
--}
--
--/**
-- * list_splice - join two lists
-- * @list: the new list to add.
-- * @head: the place to add it in the first list.
-- */
--static inline void list_splice(struct list_head *list, struct list_head *head)
--{
-- if (!list_empty(list))
-- __list_splice(list, head);
--}
--
--/**
-- * list_splice_init - join two lists and reinitialise the emptied list.
-- * @list: the new list to add.
-- * @head: the place to add it in the first list.
-- *
-- * The list at @list is reinitialised
-- */
--static inline void list_splice_init(struct list_head *list,
-- struct list_head *head)
--{
-- if (!list_empty(list)) {
-- __list_splice(list, head);
-- INIT_LIST_HEAD(list);
-- }
--}
--
--/**
-- * list_entry - get the struct for this entry
-- * @ptr: the &struct list_head pointer.
-- * @type: the type of the struct this is embedded in.
-- * @member: the name of the list_struct within the struct.
-- */
--#define list_entry(ptr, type, member) \
-- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
--
--/**
-- * list_for_each - iterate over a list
-- * @pos: the &struct list_head to use as a loop counter.
-- * @head: the head for your list.
-- */
--#define list_for_each(pos, head) \
-- for (pos = (head)->next ; pos != (head); pos = pos->next )
--
--/**
-- * list_for_each_prev - iterate over a list in reverse order
-- * @pos: the &struct list_head to use as a loop counter.
-- * @head: the head for your list.
-- */
--#define list_for_each_prev(pos, head) \
-- for (pos = (head)->prev ; pos != (head); pos = pos->prev)
--
--/**
-- * list_for_each_safe - iterate over a list safe against removal of list entry
-- * @pos: the &struct list_head to use as a loop counter.
-- * @n: another &struct list_head to use as temporary storage
-- * @head: the head for your list.
-- */
--#define list_for_each_safe(pos, n, head) \
-- for (pos = (head)->next, n = pos->next; pos != (head); \
-- pos = n, n = pos->next)
--
--#endif
--
--#ifndef list_for_each_entry
--/**
-- * list_for_each_entry - iterate over list of given type
-- * @pos: the type * to use as a loop counter.
-- * @head: the head for your list.
-- * @member: the name of the list_struct within the struct.
-- */
--#define list_for_each_entry(pos, head, member) \
-- for (pos = list_entry((head)->next, typeof(*pos), member); \
-- &pos->member != (head); \
-- pos = list_entry(pos->member.next, typeof(*pos), member))
--#endif
--
--#ifndef list_for_each_entry_safe
--/**
-- * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
-- * @pos: the type * to use as a loop counter.
-- * @n: another type * to use as temporary storage
-- * @head: the head for your list.
-- * @member: the name of the list_struct within the struct.
-- */
--#define list_for_each_entry_safe(pos, n, head, member) \
-- for (pos = list_entry((head)->next, typeof(*pos), member), \
-- n = list_entry(pos->member.next, typeof(*pos), member); \
-- &pos->member != (head); \
-- pos = n, n = list_entry(n->member.next, typeof(*n), member))
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Compile with:
-- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
-- */
--#ifndef __LTRACE_H_
--#define __LTRACE_H_
--
--#include <stdio.h>
--#include <stdlib.h>
--#include <getopt.h>
--#include <string.h>
--#include <errno.h>
--#include <sys/types.h>
--#include <sys/stat.h>
--#include <fcntl.h>
--#include <unistd.h>
--#include <sys/time.h>
--#include <portals/types.h>
--#include <linux/kp30.h>
--#include <portals/ptlctl.h>
--#include <linux/limits.h>
--#include <asm/page.h>
--#include <linux/version.h>
--
--static inline int ltrace_write_file(char* fname)
--{
-- char* argv[3];
--
-- argv[0] = "debug_kernel";
-- argv[1] = fname;
-- argv[2] = "1";
--
-- fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
--
-- return jt_dbg_debug_kernel(3, argv);
--}
--
--static inline int ltrace_clear()
--{
-- char* argv[1];
--
-- argv[0] = "clear";
--
-- fprintf(stderr, "[ptlctl] %s\n", argv[0]);
--
-- return jt_dbg_clear_debug_buf(1, argv);
--}
--
--static inline int ltrace_mark(int indent_level, char* text)
--{
-- char* argv[2];
-- char mark_buf[PATH_MAX];
--
-- snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
--
-- argv[0] = "mark";
-- argv[1] = mark_buf;
-- return jt_dbg_mark_debug_buf(2, argv);
--}
--
--static inline int ltrace_applymasks()
--{
-- char* argv[2];
-- argv[0] = "list";
-- argv[1] = "applymasks";
--
-- fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
--
-- return jt_dbg_list(2, argv);
--}
--
--
--static inline int ltrace_filter(char* subsys_or_mask)
--{
-- char* argv[2];
-- argv[0] = "filter";
-- argv[1] = subsys_or_mask;
-- return jt_dbg_filter(2, argv);
--}
--
--static inline int ltrace_show(char* subsys_or_mask)
--{
-- char* argv[2];
-- argv[0] = "show";
-- argv[1] = subsys_or_mask;
-- return jt_dbg_show(2, argv);
--}
--
--static inline int ltrace_start()
--{
-- int rc = 0;
-- dbg_initialize(0, NULL);
--#ifdef PORTALS_DEV_ID
-- rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
--#endif
-- ltrace_filter("class");
-- ltrace_filter("socknal");
-- ltrace_filter("qswnal");
-- ltrace_filter("gmnal");
-- ltrace_filter("portals");
--
-- ltrace_show("all_types");
-- ltrace_filter("trace");
-- ltrace_filter("malloc");
-- ltrace_filter("net");
-- ltrace_filter("page");
-- ltrace_filter("other");
-- ltrace_filter("info");
-- ltrace_applymasks();
--
-- return rc;
--}
--
--
--static inline void ltrace_stop()
--{
--#ifdef PORTALS_DEV_ID
-- unregister_ioc_dev(PORTALS_DEV_ID);
--#endif
--}
--
--static inline int not_uml()
--{
-- /* Return Values:
-- * 0 when run under UML
-- * 1 when run on host
-- * <0 when lookup failed
-- */
-- struct stat buf;
-- int rc = stat("/dev/ubd", &buf);
-- rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc;
-- if (rc<0) {
-- fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno));
-- rc = 1; /* Assume host */
-- }
-- return rc;
--}
--
--#define LTRACE_MAX_NOB 256
--static inline void ltrace_add_processnames(char* fname)
--{
-- char cmdbuf[LTRACE_MAX_NOB];
-- struct timeval tv;
-- struct timezone tz;
-- int nob;
-- int underuml = !not_uml();
--
-- gettimeofday(&tv, &tz);
--
-- nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
--
-- /* Careful - these format strings need to match the CDEBUG
-- * formats in portals/linux/debug.c EXACTLY
-- */
-- nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ",
-- S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec);
--
-- if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) {
-- nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
-- "(%s:%d:%s() %d | %d+%lu): ",
-- "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L);
-- }
-- else {
-- nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
-- "(%s:%d:%s() %d+%lu): ",
-- "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
-- }
--
-- nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
-- system(cmdbuf);
--}
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- */
--#ifndef _P30_H_
--#define _P30_H_
-
-#include "build_check.h"
--
--/*
-- * p30.h
-- *
-- * User application interface file
-- */
--
--#if defined (__KERNEL__)
--#include <linux/uio.h>
--#include <linux/types.h>
--#else
--#include <sys/types.h>
--#include <sys/uio.h>
--#endif
--
--#include <portals/types.h>
- #include <portals/nal.h>
--#include <portals/api.h>
- #include <portals/nalids.h>
-
- extern int __p30_initialized; /* for libraries & test codes */
- extern int __p30_myr_initialized; /* that don't know if p30 */
- extern int __p30_ip_initialized; /* had been initialized yet */
- extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
-
- extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */
- extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */
-
- /*
- * Debugging flags reserved for the Portals reference library.
- * These are not part of the API as described in the SAND report
- * but are for the use of the maintainers of the reference implementation.
- *
- * It is not expected that the real implementations will export
- * this functionality.
- */
- #define PTL_DEBUG_NONE 0ul
- #define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */
-
- #define __bit(x) ((unsigned long) 1<<(x))
- #define PTL_DEBUG_PUT __bit(0)
- #define PTL_DEBUG_GET __bit(1)
- #define PTL_DEBUG_REPLY __bit(2)
- #define PTL_DEBUG_ACK __bit(3)
- #define PTL_DEBUG_DROP __bit(4)
- #define PTL_DEBUG_REQUEST __bit(5)
- #define PTL_DEBUG_DELIVERY __bit(6)
- #define PTL_DEBUG_UNLINK __bit(7)
- #define PTL_DEBUG_THRESHOLD __bit(8)
- #define PTL_DEBUG_API __bit(9)
-
- /*
- * These eight are reserved for the NAL to define
- * It should probably give them better names...
- */
- #define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */
- #define PTL_DEBUG_NI0 __bit(24)
- #define PTL_DEBUG_NI1 __bit(25)
- #define PTL_DEBUG_NI2 __bit(26)
- #define PTL_DEBUG_NI3 __bit(27)
- #define PTL_DEBUG_NI4 __bit(28)
- #define PTL_DEBUG_NI5 __bit(29)
- #define PTL_DEBUG_NI6 __bit(30)
- #define PTL_DEBUG_NI7 __bit(31)
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- * header for libptlctl.a
-- */
--#ifndef _PTLCTL_H_
--#define _PTLCTL_H_
-
-#include <portals/types.h>
-#include <linux/kp30.h>
-#include <linux/libcfs.h>
--
--#define PORTALS_DEV_ID 0
--#define PORTALS_DEV_PATH "/dev/portals"
--#define OBD_DEV_ID 1
--#define OBD_DEV_PATH "/dev/obd"
-#define SMFS_DEV_ID 2
-#define SMFS_DEV_PATH "/dev/snapdev"
--
--int ptl_name2nal(char *str);
--int ptl_parse_ipaddr (__u32 *ipaddrp, char *str);
--int ptl_parse_nid (ptl_nid_t *nidp, char *str);
--char * ptl_nid2str (char *buffer, ptl_nid_t nid);
--
--int ptl_initialize(int argc, char **argv);
--int jt_ptl_network(int argc, char **argv);
--int jt_ptl_print_autoconnects (int argc, char **argv);
--int jt_ptl_add_autoconnect (int argc, char **argv);
--int jt_ptl_del_autoconnect (int argc, char **argv);
-int jt_ptl_print_interfaces(int argc, char **argv);
-int jt_ptl_add_interface(int argc, char **argv);
-int jt_ptl_del_interface(int argc, char **argv);
-int jt_ptl_print_peers (int argc, char **argv);
-int jt_ptl_add_peer (int argc, char **argv);
-int jt_ptl_del_peer (int argc, char **argv);
--int jt_ptl_print_connections (int argc, char **argv);
--int jt_ptl_connect(int argc, char **argv);
--int jt_ptl_disconnect(int argc, char **argv);
--int jt_ptl_push_connection(int argc, char **argv);
--int jt_ptl_print_active_txs(int argc, char **argv);
--int jt_ptl_ping(int argc, char **argv);
--int jt_ptl_shownid(int argc, char **argv);
--int jt_ptl_mynid(int argc, char **argv);
--int jt_ptl_add_uuid(int argc, char **argv);
--int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */
--int jt_ptl_close_uuid(int argc, char **argv);
--int jt_ptl_del_uuid(int argc, char **argv);
--int jt_ptl_rxmem (int argc, char **argv);
--int jt_ptl_txmem (int argc, char **argv);
--int jt_ptl_nagle (int argc, char **argv);
--int jt_ptl_add_route (int argc, char **argv);
--int jt_ptl_del_route (int argc, char **argv);
--int jt_ptl_notify_router (int argc, char **argv);
--int jt_ptl_print_routes (int argc, char **argv);
--int jt_ptl_fail_nid (int argc, char **argv);
--int jt_ptl_lwt(int argc, char **argv);
--int jt_ptl_memhog(int argc, char **argv);
--
--int dbg_initialize(int argc, char **argv);
--int jt_dbg_filter(int argc, char **argv);
--int jt_dbg_show(int argc, char **argv);
--int jt_dbg_list(int argc, char **argv);
--int jt_dbg_debug_kernel(int argc, char **argv);
--int jt_dbg_debug_daemon(int argc, char **argv);
--int jt_dbg_debug_file(int argc, char **argv);
--int jt_dbg_clear_debug_buf(int argc, char **argv);
--int jt_dbg_mark_debug_buf(int argc, char **argv);
--int jt_dbg_modules(int argc, char **argv);
--int jt_dbg_panic(int argc, char **argv);
--
--int ptl_set_cfg_record_cb(cfg_record_cb_t cb);
--
--/* l_ioctl.c */
- typedef int (ioc_handler_t)(int dev_id, int opc, void *buf);
-typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf);
--void set_ioc_handler(ioc_handler_t *handler);
--int register_ioc_dev(int dev_id, const char * dev_name);
--void unregister_ioc_dev(int dev_id);
--int set_ioctl_dump(char * file);
- int l_ioctl(int dev_id, int opc, void *buf);
- int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *));
-int l_ioctl(int dev_id, unsigned int opc, void *buf);
-int parse_dump(char * dump_file, ioc_handler_t ioc_func);
--int jt_ioc_dump(int argc, char **argv);
-extern char *dump_filename;
-int dump(int dev_id, unsigned int opc, void *buf);
--
--#endif
+++ /dev/null
--#ifndef MYRNAL_H
--#define MYRNAL_H
--
--#define MAX_ARGS_LEN (256)
--#define MAX_RET_LEN (128)
--#define MYRNAL_MAX_ACL_SIZE (64)
--#define MYRNAL_MAX_PTL_SIZE (64)
--
--#define P3CMD (100)
--#define P3SYSCALL (200)
--#define P3REGISTER (300)
--
--enum { PTL_MLOCKALL };
--
--typedef struct {
-- void *args;
-- size_t args_len;
-- void *ret;
-- size_t ret_len;
-- int p3cmd;
--} myrnal_forward_t;
--
--#endif /* MYRNAL_H */
+++ /dev/null
--#ifndef _NAL_H_
--#define _NAL_H_
-
-#include "build_check.h"
--
--/*
-- * p30/nal.h
-- *
-- * The API side NAL declarations
-- */
--
--#include <portals/types.h>
-
- #ifdef yield
- #undef yield
- #endif
--
--typedef struct nal_t nal_t;
--
--struct nal_t {
- ptl_ni_t ni;
- int refct;
- void *nal_data;
- int *timeout; /* for libp30api users */
- int (*forward) (nal_t * nal, int index, /* Function ID */
- void *args, size_t arg_len, void *ret, size_t ret_len);
- /* common interface state */
- int nal_refct;
- ptl_handle_ni_t nal_handle;
--
- int (*shutdown) (nal_t * nal, int interface);
- /* NAL-private data */
- void *nal_data;
--
- int (*validate) (nal_t * nal, void *base, size_t extent);
- /* NAL API implementation
- * NB only nal_ni_init needs to be set when the NAL registers itself */
- int (*nal_ni_init) (nal_t *nal, ptl_pid_t requested_pid,
- ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
-
- void (*nal_ni_fini) (nal_t *nal);
--
- void (*yield) (nal_t * nal);
- int (*nal_get_id) (nal_t *nal, ptl_process_id_t *id);
- int (*nal_ni_status) (nal_t *nal, ptl_sr_index_t register, ptl_sr_value_t *status);
- int (*nal_ni_dist) (nal_t *nal, ptl_process_id_t *id, unsigned long *distance);
- int (*nal_fail_nid) (nal_t *nal, ptl_nid_t nid, unsigned int threshold);
--
- void (*lock) (nal_t * nal, unsigned long *flags);
- int (*nal_me_attach) (nal_t *nal, ptl_pt_index_t portal,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
- ptl_unlink_t unlink, ptl_ins_pos_t pos,
- ptl_handle_me_t *handle);
- int (*nal_me_insert) (nal_t *nal, ptl_handle_me_t *me,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
- ptl_unlink_t unlink, ptl_ins_pos_t pos,
- ptl_handle_me_t *handle);
- int (*nal_me_unlink) (nal_t *nal, ptl_handle_me_t *me);
-
- int (*nal_md_attach) (nal_t *nal, ptl_handle_me_t *me,
- ptl_md_t *md, ptl_unlink_t unlink,
- ptl_handle_md_t *handle);
- int (*nal_md_bind) (nal_t *nal,
- ptl_md_t *md, ptl_unlink_t unlink,
- ptl_handle_md_t *handle);
- int (*nal_md_unlink) (nal_t *nal, ptl_handle_md_t *md);
- int (*nal_md_update) (nal_t *nal, ptl_handle_md_t *md,
- ptl_md_t *old_md, ptl_md_t *new_md,
- ptl_handle_eq_t *testq);
--
- void (*unlock) (nal_t * nal, unsigned long *flags);
- };
- int (*nal_eq_alloc) (nal_t *nal, ptl_size_t count,
- ptl_eq_handler_t handler,
- ptl_handle_eq_t *handle);
- int (*nal_eq_free) (nal_t *nal, ptl_handle_eq_t *eq);
- int (*nal_eq_poll) (nal_t *nal,
- ptl_handle_eq_t *eqs, int neqs, int timeout,
- ptl_event_t *event, int *which);
--
- typedef nal_t *(ptl_interface_t) (int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid);
- extern nal_t *PTL_IFACE_IP(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid);
- extern nal_t *PTL_IFACE_MYR(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid);
- int (*nal_ace_entry) (nal_t *nal, ptl_ac_index_t index,
- ptl_process_id_t match_id, ptl_pt_index_t portal);
-
- int (*nal_put) (nal_t *nal, ptl_handle_md_t *md, ptl_ack_req_t ack,
- ptl_process_id_t *target, ptl_pt_index_t portal,
- ptl_ac_index_t ac, ptl_match_bits_t match,
- ptl_size_t offset, ptl_hdr_data_t hdr_data);
- int (*nal_get) (nal_t *nal, ptl_handle_md_t *md,
- ptl_process_id_t *target, ptl_pt_index_t portal,
- ptl_ac_index_t ac, ptl_match_bits_t match,
- ptl_size_t offset);
-};
--
- extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any);
-extern nal_t *ptl_hndl2nal(ptl_handle_any_t *any);
--
- #ifndef PTL_IFACE_DEFAULT
- #define PTL_IFACE_DEFAULT (PTL_IFACE_IP)
-#ifdef __KERNEL__
-extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal);
-extern void ptl_unregister_nal(ptl_interface_t interface);
--#endif
--
--#endif
+++ /dev/null
- #define PTL_IFACE_TCP 1
- #define PTL_IFACE_ER 2
- #define PTL_IFACE_SS 3
- #define PTL_IFACE_MAX 4
-#include "build_check.h"
-
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- */
--#ifndef _P30_H_
--#define _P30_H_
-
-#include "build_check.h"
--
--/*
-- * p30.h
-- *
-- * User application interface file
-- */
--
--#if defined (__KERNEL__)
--#include <linux/uio.h>
--#include <linux/types.h>
--#else
--#include <sys/types.h>
--#include <sys/uio.h>
--#endif
--
--#include <portals/types.h>
- #include <portals/nal.h>
--#include <portals/api.h>
- #include <portals/nalids.h>
-
- extern int __p30_initialized; /* for libraries & test codes */
- extern int __p30_myr_initialized; /* that don't know if p30 */
- extern int __p30_ip_initialized; /* had been initialized yet */
- extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle;
-
- extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */
- extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */
-
- /*
- * Debugging flags reserved for the Portals reference library.
- * These are not part of the API as described in the SAND report
- * but are for the use of the maintainers of the reference implementation.
- *
- * It is not expected that the real implementations will export
- * this functionality.
- */
- #define PTL_DEBUG_NONE 0ul
- #define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */
-
- #define __bit(x) ((unsigned long) 1<<(x))
- #define PTL_DEBUG_PUT __bit(0)
- #define PTL_DEBUG_GET __bit(1)
- #define PTL_DEBUG_REPLY __bit(2)
- #define PTL_DEBUG_ACK __bit(3)
- #define PTL_DEBUG_DROP __bit(4)
- #define PTL_DEBUG_REQUEST __bit(5)
- #define PTL_DEBUG_DELIVERY __bit(6)
- #define PTL_DEBUG_UNLINK __bit(7)
- #define PTL_DEBUG_THRESHOLD __bit(8)
- #define PTL_DEBUG_API __bit(9)
-
- /*
- * These eight are reserved for the NAL to define
- * It should probably give them better names...
- */
- #define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */
- #define PTL_DEBUG_NI0 __bit(24)
- #define PTL_DEBUG_NI1 __bit(25)
- #define PTL_DEBUG_NI2 __bit(26)
- #define PTL_DEBUG_NI3 __bit(27)
- #define PTL_DEBUG_NI4 __bit(28)
- #define PTL_DEBUG_NI5 __bit(29)
- #define PTL_DEBUG_NI6 __bit(30)
- #define PTL_DEBUG_NI7 __bit(31)
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- * header for libptlctl.a
-- */
--#ifndef _PTLCTL_H_
--#define _PTLCTL_H_
-
-#include <portals/types.h>
-#include <linux/kp30.h>
-#include <linux/libcfs.h>
--
--#define PORTALS_DEV_ID 0
--#define PORTALS_DEV_PATH "/dev/portals"
--#define OBD_DEV_ID 1
--#define OBD_DEV_PATH "/dev/obd"
-#define SMFS_DEV_ID 2
-#define SMFS_DEV_PATH "/dev/snapdev"
--
--int ptl_name2nal(char *str);
--int ptl_parse_ipaddr (__u32 *ipaddrp, char *str);
--int ptl_parse_nid (ptl_nid_t *nidp, char *str);
--char * ptl_nid2str (char *buffer, ptl_nid_t nid);
--
--int ptl_initialize(int argc, char **argv);
--int jt_ptl_network(int argc, char **argv);
--int jt_ptl_print_autoconnects (int argc, char **argv);
--int jt_ptl_add_autoconnect (int argc, char **argv);
--int jt_ptl_del_autoconnect (int argc, char **argv);
-int jt_ptl_print_interfaces(int argc, char **argv);
-int jt_ptl_add_interface(int argc, char **argv);
-int jt_ptl_del_interface(int argc, char **argv);
-int jt_ptl_print_peers (int argc, char **argv);
-int jt_ptl_add_peer (int argc, char **argv);
-int jt_ptl_del_peer (int argc, char **argv);
--int jt_ptl_print_connections (int argc, char **argv);
--int jt_ptl_connect(int argc, char **argv);
--int jt_ptl_disconnect(int argc, char **argv);
--int jt_ptl_push_connection(int argc, char **argv);
--int jt_ptl_print_active_txs(int argc, char **argv);
--int jt_ptl_ping(int argc, char **argv);
--int jt_ptl_shownid(int argc, char **argv);
--int jt_ptl_mynid(int argc, char **argv);
--int jt_ptl_add_uuid(int argc, char **argv);
--int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */
--int jt_ptl_close_uuid(int argc, char **argv);
--int jt_ptl_del_uuid(int argc, char **argv);
--int jt_ptl_rxmem (int argc, char **argv);
--int jt_ptl_txmem (int argc, char **argv);
--int jt_ptl_nagle (int argc, char **argv);
--int jt_ptl_add_route (int argc, char **argv);
--int jt_ptl_del_route (int argc, char **argv);
--int jt_ptl_notify_router (int argc, char **argv);
--int jt_ptl_print_routes (int argc, char **argv);
--int jt_ptl_fail_nid (int argc, char **argv);
--int jt_ptl_lwt(int argc, char **argv);
--int jt_ptl_memhog(int argc, char **argv);
--
--int dbg_initialize(int argc, char **argv);
--int jt_dbg_filter(int argc, char **argv);
--int jt_dbg_show(int argc, char **argv);
--int jt_dbg_list(int argc, char **argv);
--int jt_dbg_debug_kernel(int argc, char **argv);
--int jt_dbg_debug_daemon(int argc, char **argv);
--int jt_dbg_debug_file(int argc, char **argv);
--int jt_dbg_clear_debug_buf(int argc, char **argv);
--int jt_dbg_mark_debug_buf(int argc, char **argv);
--int jt_dbg_modules(int argc, char **argv);
--int jt_dbg_panic(int argc, char **argv);
--
--int ptl_set_cfg_record_cb(cfg_record_cb_t cb);
--
--/* l_ioctl.c */
- typedef int (ioc_handler_t)(int dev_id, int opc, void *buf);
-typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf);
--void set_ioc_handler(ioc_handler_t *handler);
--int register_ioc_dev(int dev_id, const char * dev_name);
--void unregister_ioc_dev(int dev_id);
--int set_ioctl_dump(char * file);
- int l_ioctl(int dev_id, int opc, void *buf);
- int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *));
-int l_ioctl(int dev_id, unsigned int opc, void *buf);
-int parse_dump(char * dump_file, ioc_handler_t ioc_func);
--int jt_ioc_dump(int argc, char **argv);
-extern char *dump_filename;
-int dump(int dev_id, unsigned int opc, void *buf);
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * <portals/socknal.h>
-- *
-- * #defines shared between socknal implementation and utilities
-- */
--
--#define SOCKNAL_CONN_NONE (-1)
--#define SOCKNAL_CONN_ANY 0
--#define SOCKNAL_CONN_CONTROL 1
--#define SOCKNAL_CONN_BULK_IN 2
--#define SOCKNAL_CONN_BULK_OUT 3
--#define SOCKNAL_CONN_NTYPES 4
+++ /dev/null
--/*
-- * stringtab.h
-- */
+++ /dev/null
--#ifndef _P30_TYPES_H_
--#define _P30_TYPES_H_
-
- #include <asm/types.h>
--
- #ifdef __KERNEL__
- # include <linux/time.h>
- # include <asm/timex.h>
- #else
- # include <sys/time.h>
- # define do_gettimeofday(tv) gettimeofday(tv, NULL);
- typedef unsigned long long cycles_t;
- #endif
-#include "build_check.h"
--
-#include <linux/libcfs.h>
--#include <portals/errno.h>
-
-/* This implementation uses the same type for API function return codes and
- * the completion status in an event */
-#define PTL_NI_OK PTL_OK
-typedef ptl_err_t ptl_ni_fail_t;
--
-typedef __u32 ptl_uid_t;
-typedef __u32 ptl_jid_t;
--typedef __u64 ptl_nid_t;
--typedef __u32 ptl_pid_t;
--typedef __u32 ptl_pt_index_t;
--typedef __u32 ptl_ac_index_t;
--typedef __u64 ptl_match_bits_t;
--typedef __u64 ptl_hdr_data_t;
--typedef __u32 ptl_size_t;
-
-#define PTL_TIME_FOREVER (-1)
--
--typedef struct {
-- unsigned long nal_idx; /* which network interface */
-- __u64 cookie; /* which thing on that interface */
--} ptl_handle_any_t;
--
--typedef ptl_handle_any_t ptl_handle_ni_t;
--typedef ptl_handle_any_t ptl_handle_eq_t;
--typedef ptl_handle_any_t ptl_handle_md_t;
--typedef ptl_handle_any_t ptl_handle_me_t;
--
- #define PTL_HANDLE_NONE \
-#define PTL_INVALID_HANDLE \
-- ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
- #define PTL_EQ_NONE PTL_HANDLE_NONE
-#define PTL_EQ_NONE PTL_INVALID_HANDLE
--
- static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
-static inline int PtlHandleIsEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
--{
-- return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie);
--}
--
-#define PTL_UID_ANY ((ptl_uid_t) -1)
-#define PTL_JID_ANY ((ptl_jid_t) -1)
--#define PTL_NID_ANY ((ptl_nid_t) -1)
--#define PTL_PID_ANY ((ptl_pid_t) -1)
--
--typedef struct {
-- ptl_nid_t nid;
-- ptl_pid_t pid; /* node id / process id */
--} ptl_process_id_t;
--
--typedef enum {
-- PTL_RETAIN = 0,
-- PTL_UNLINK
--} ptl_unlink_t;
--
--typedef enum {
-- PTL_INS_BEFORE,
-- PTL_INS_AFTER
--} ptl_ins_pos_t;
-
- typedef struct {
- struct page *kiov_page;
- unsigned int kiov_len;
- unsigned int kiov_offset;
- } ptl_kiov_t;
--
--typedef struct {
-- void *start;
-- ptl_size_t length;
-- int threshold;
-- int max_size;
-- unsigned int options;
-- void *user_ptr;
- ptl_handle_eq_t eventq;
- unsigned int niov;
- ptl_handle_eq_t eq_handle;
--} ptl_md_t;
--
--/* Options for the MD structure */
- #define PTL_MD_OP_PUT (1 << 0)
- #define PTL_MD_OP_GET (1 << 1)
- #define PTL_MD_MANAGE_REMOTE (1 << 2)
- #define PTL_MD_AUTO_UNLINK (1 << 3)
- #define PTL_MD_TRUNCATE (1 << 4)
- #define PTL_MD_ACK_DISABLE (1 << 5)
- #define PTL_MD_IOV (1 << 6)
- #define PTL_MD_MAX_SIZE (1 << 7)
- #define PTL_MD_KIOV (1 << 8)
-#define PTL_MD_OP_PUT (1 << 0)
-#define PTL_MD_OP_GET (1 << 1)
-#define PTL_MD_MANAGE_REMOTE (1 << 2)
-/* unused (1 << 3) */
-#define PTL_MD_TRUNCATE (1 << 4)
-#define PTL_MD_ACK_DISABLE (1 << 5)
-#define PTL_MD_IOVEC (1 << 6)
-#define PTL_MD_MAX_SIZE (1 << 7)
-#define PTL_MD_KIOV (1 << 8)
-#define PTL_MD_EVENT_START_DISABLE (1 << 9)
-#define PTL_MD_EVENT_END_DISABLE (1 << 10)
-
-/* For compatibility with Cray Portals */
-#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0
-#define PTL_MD_PHYS 0
--
--#define PTL_MD_THRESH_INF (-1)
-
-/* NB lustre portals uses struct iovec internally! */
-typedef struct iovec ptl_md_iovec_t;
-
-typedef struct {
- struct page *kiov_page;
- unsigned int kiov_len;
- unsigned int kiov_offset;
-} ptl_kiov_t;
--
--typedef enum {
- PTL_EVENT_GET,
- PTL_EVENT_PUT,
- PTL_EVENT_REPLY,
- PTL_EVENT_GET_START,
- PTL_EVENT_GET_END,
-
- PTL_EVENT_PUT_START,
- PTL_EVENT_PUT_END,
-
- PTL_EVENT_REPLY_START,
- PTL_EVENT_REPLY_END,
-
-- PTL_EVENT_ACK,
- PTL_EVENT_SENT,
-
- PTL_EVENT_SEND_START,
- PTL_EVENT_SEND_END,
-
-- PTL_EVENT_UNLINK,
--} ptl_event_kind_t;
--
--#define PTL_SEQ_BASETYPE long
--typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t;
--#define PTL_SEQ_GT(a,b) (((signed PTL_SEQ_BASETYPE)((a) - (b))) > 0)
--
--/* XXX
-- * cygwin need the pragma line, not clear if it's needed in other places.
-- * checking!!!
-- */
--#ifdef __CYGWIN__
--#pragma pack(push, 4)
--#endif
--typedef struct {
-- ptl_event_kind_t type;
- ptl_err_t status;
- int unlinked;
-- ptl_process_id_t initiator;
- ptl_pt_index_t portal;
- ptl_uid_t uid;
- ptl_jid_t jid;
- ptl_pt_index_t pt_index;
-- ptl_match_bits_t match_bits;
-- ptl_size_t rlength;
- ptl_size_t mlength;
- ptl_size_t offset;
- ptl_md_t mem_desc;
- ptl_size_t mlength;
- ptl_size_t offset;
- ptl_handle_md_t md_handle;
- ptl_md_t md;
-- ptl_hdr_data_t hdr_data;
- struct timeval arrival_time;
- ptl_seq_t link;
- ptl_ni_fail_t ni_fail_type;
-
- int unlinked;
--
-- volatile ptl_seq_t sequence;
--} ptl_event_t;
--#ifdef __CYGWIN__
--#pragma pop
--#endif
--
--typedef enum {
-- PTL_ACK_REQ,
-- PTL_NOACK_REQ
--} ptl_ack_req_t;
-
- typedef struct {
- volatile ptl_seq_t sequence;
- ptl_size_t size;
- ptl_event_t *base;
- ptl_handle_any_t cb_eq_handle;
- } ptl_eq_t;
--
- typedef struct {
- ptl_eq_t *eq;
- } ptl_ni_t;
-typedef void (*ptl_eq_handler_t)(ptl_event_t *event);
-#define PTL_EQ_HANDLER_NONE NULL
--
--typedef struct {
- int max_match_entries; /* max number of match entries */
- int max_mem_descriptors; /* max number of memory descriptors */
- int max_event_queues; /* max number of event queues */
- int max_atable_index; /* maximum access control list table index */
- int max_ptable_index; /* maximum portals table index */
- int max_mes;
- int max_mds;
- int max_eqs;
- int max_ac_index;
- int max_pt_index;
- int max_md_iovecs;
- int max_me_list;
- int max_getput_md;
--} ptl_ni_limits_t;
--
--/*
-- * Status registers
-- */
--typedef enum {
-- PTL_SR_DROP_COUNT,
-- PTL_SR_DROP_LENGTH,
-- PTL_SR_RECV_COUNT,
-- PTL_SR_RECV_LENGTH,
-- PTL_SR_SEND_COUNT,
-- PTL_SR_SEND_LENGTH,
-- PTL_SR_MSGS_MAX,
--} ptl_sr_index_t;
--
--typedef int ptl_sr_value_t;
-
-typedef int ptl_interface_t;
-#define PTL_IFACE_DEFAULT (-1)
--
--#endif
+++ /dev/null
--Makefile
--autoMakefile
--autoMakefile.in
--.*.cmd
--.depend
+++ /dev/null
--@BUILD_GMNAL_TRUE@subdir-m += gmnal
- @BUILD_IBNAL_TRUE@subdir-m += ibnal
-@BUILD_OPENIBNAL_TRUE@subdir-m += openibnal
--@BUILD_QSWNAL_TRUE@subdir-m += qswnal
--subdir-m += socknal
--
--@INCLUDE_RULES@
+++ /dev/null
--include $(obj)/../Kernelenv
--
--obj-y = socknal/
--# more coming...
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
- SUBDIRS = gmnal ibnal qswnal socknal
-SUBDIRS = gmnal openibnal qswnal socknal
+++ /dev/null
--.deps
--Makefile
--autoMakefile.in
--autoMakefile
--*.ko
--*.mod.c
--.*.cmd
--.*.flags
--.tmp_versions
--.depend
+++ /dev/null
--MODULES := kgmnal
--kgmnal-objs := gmnal_api.o gmnal_cb.o gmnal_comm.o gmnal_utils.o gmnal_module.o
--
--EXTRA_PRE_CFLAGS := @GMCPPFLAGS@
--
--@INCLUDE_RULES@
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--include ../../Kernelenv
--
--obj-y += gmnal.o
--gmnal-objs := gmnal_api.o gmnal_cb.o gmnal_utils.o gmnal_comm.o gmnal_module.o
--
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--if MODULES
--if BUILD_GMNAL
--if !CRAY_PORTALS
--modulenet_DATA = kgmnal$(KMODEXT)
--endif
--endif
--endif
--
--MOSTLYCLEANFILES = *.o *.ko *.mod.c
--DIST_SOURCES = $(kgmnal-objs:%.o=%.c) gmnal.h
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
-- *
-- * This file is part of Lustre, http://www.lustre.org/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--
--/*
-- * Portals GM kernel NAL header file
-- * This file makes all declaration and prototypes
-- * for the API side and CB side of the NAL
-- */
--#ifndef __INCLUDE_GMNAL_H__
--#define __INCLUDE_GMNAL_H__
--
--#ifndef EXPORT_SYMTAB
--# define EXPORT_SYMTAB
--#endif
--
--#include "linux/config.h"
--#include "linux/module.h"
--#include "linux/tty.h"
--#include "linux/kernel.h"
--#include "linux/mm.h"
--#include "linux/string.h"
--#include "linux/stat.h"
--#include "linux/errno.h"
--#include "linux/locks.h"
--#include "linux/unistd.h"
--#include "linux/init.h"
--#include "linux/sem.h"
--#include "linux/vmalloc.h"
--#include "linux/sysctl.h"
--
--#define DEBUG_SUBSYSTEM S_GMNAL
--
--#include "portals/nal.h"
--#include "portals/api.h"
--#include "portals/errno.h"
--#include "linux/kp30.h"
--#include "portals/p30.h"
--
- #include "portals/lib-nal.h"
-#include "portals/nal.h"
--#include "portals/lib-p30.h"
--
--#define GM_STRONG_TYPES 1
-#ifdef VERSION
-#undef VERSION
-#endif
--#include "gm.h"
--#include "gm_internal.h"
--
--
--
--/*
-- * Defines for the API NAL
-- */
--
--/*
-- * Small message size is configurable
-- * insmod can set small_msg_size
-- * which is used to populate nal_data.small_msg_size
-- */
--#define GMNAL_SMALL_MESSAGE 1078
--#define GMNAL_LARGE_MESSAGE_INIT 1079
--#define GMNAL_LARGE_MESSAGE_ACK 1080
--#define GMNAL_LARGE_MESSAGE_FINI 1081
--
--extern int gmnal_small_msg_size;
--extern int num_rx_threads;
--extern int num_stxds;
--extern int gm_port;
--#define GMNAL_SMALL_MSG_SIZE(a) a->small_msg_size
--#define GMNAL_IS_SMALL_MESSAGE(n,a,b,c) gmnal_is_small_msg(n, a, b, c)
--#define GMNAL_MAGIC 0x1234abcd
--/*
-- * The gm_port to use for gmnal
-- */
--#define GMNAL_GM_PORT gm_port
--
--
--/*
-- * Small Transmit Descriptor
-- * A structre to keep track of a small transmit operation
-- * This structure has a one-to-one relationship with a small
-- * transmit buffer (both create by gmnal_stxd_alloc).
-- * There are two free list of stxd. One for use by clients of the NAL
-- * and the other by the NAL rxthreads when doing sends.
-- * This helps prevent deadlock caused by stxd starvation.
-- */
--typedef struct _gmnal_stxd_t {
-- void *buffer;
-- int buffer_size;
-- gm_size_t gm_size;
-- int msg_size;
-- int gm_target_node;
-- int gm_priority;
-- int type;
-- struct _gmnal_data_t *nal_data;
-- lib_msg_t *cookie;
-- int niov;
-- struct iovec iov[PTL_MD_MAX_IOV];
-- struct _gmnal_stxd_t *next;
-- int rxt;
-- int kniov;
-- struct iovec *iovec_dup;
--} gmnal_stxd_t;
--
--/*
-- * keeps a transmit token for large transmit (gm_get)
-- * and a pointer to rxd that is used as context for large receive
-- */
--typedef struct _gmnal_ltxd_t {
-- struct _gmnal_ltxd_t *next;
-- struct _gmnal_srxd_t *srxd;
--} gmnal_ltxd_t;
--
--
--/*
-- * as for gmnal_stxd_t
-- * a hash table in nal_data find srxds from
-- * the rx buffer address. hash table populated at init time
-- */
--typedef struct _gmnal_srxd_t {
-- void *buffer;
-- int size;
-- gm_size_t gmsize;
-- unsigned int gm_source_node;
-- gmnal_stxd_t *source_stxd;
-- int type;
-- int nsiov;
-- int nriov;
-- struct iovec *riov;
-- int ncallbacks;
-- spinlock_t callback_lock;
-- int callback_status;
-- lib_msg_t *cookie;
-- struct _gmnal_srxd_t *next;
-- struct _gmnal_data_t *nal_data;
--} gmnal_srxd_t;
--
--/*
-- * Header which lmgnal puts at the start of each message
-- */
--typedef struct _gmnal_msghdr {
-- int magic;
-- int type;
-- unsigned int sender_node_id;
-- gmnal_stxd_t *stxd;
-- int niov;
-- } gmnal_msghdr_t;
--#define GMNAL_MSGHDR_SIZE sizeof(gmnal_msghdr_t)
--
--/*
-- * the caretaker thread (ct_thread) gets receive events
-- * (and other events) from the myrinet device via the GM2 API.
-- * caretaker thread populates one work entry for each receive event,
-- * puts it on a Q in nal_data and wakes a receive thread to
-- * process the receive.
-- * Processing a portals receive can involve a transmit operation.
-- * Because of this the caretaker thread cannot process receives
-- * as it may get deadlocked when supply of transmit descriptors
-- * is exhausted (as caretaker thread is responsible for replacing
-- * transmit descriptors on the free list)
-- */
--typedef struct _gmnal_rxtwe {
-- void *buffer;
-- unsigned snode;
-- unsigned sport;
-- unsigned type;
-- unsigned length;
-- struct _gmnal_rxtwe *next;
--} gmnal_rxtwe_t;
--
--/*
-- * 1 receive thread started on each CPU
-- */
--#define NRXTHREADS 10 /* max number of receiver threads */
--
--typedef struct _gmnal_data_t {
- int refcnt;
- spinlock_t cb_lock;
-- spinlock_t stxd_lock;
-- struct semaphore stxd_token;
-- gmnal_stxd_t *stxd;
-- spinlock_t rxt_stxd_lock;
-- struct semaphore rxt_stxd_token;
-- gmnal_stxd_t *rxt_stxd;
-- spinlock_t ltxd_lock;
-- struct semaphore ltxd_token;
-- gmnal_ltxd_t *ltxd;
-- spinlock_t srxd_lock;
-- struct semaphore srxd_token;
-- gmnal_srxd_t *srxd;
-- struct gm_hash *srxd_hash;
-- nal_t *nal;
- nal_cb_t *nal_cb;
- lib_nal_t *libnal;
-- struct gm_port *gm_port;
-- unsigned int gm_local_nid;
-- unsigned int gm_global_nid;
-- spinlock_t gm_lock;
-- long rxthread_pid[NRXTHREADS];
-- int rxthread_stop_flag;
-- spinlock_t rxthread_flag_lock;
-- long rxthread_flag;
-- long ctthread_pid;
-- int ctthread_flag;
-- gm_alarm_t ctthread_alarm;
-- int small_msg_size;
-- int small_msg_gmsize;
-- gmnal_rxtwe_t *rxtwe_head;
-- gmnal_rxtwe_t *rxtwe_tail;
-- spinlock_t rxtwe_lock;
-- struct semaphore rxtwe_wait;
-- struct ctl_table_header *sysctl;
--} gmnal_data_t;
--
--/*
-- * Flags to start/stop and check status of threads
-- * each rxthread sets 1 bit (any bit) of the flag on startup
-- * and clears 1 bit when exiting
-- */
--#define GMNAL_THREAD_RESET 0
--#define GMNAL_THREAD_STOP 666
--#define GMNAL_CTTHREAD_STARTED 333
--#define GMNAL_RXTHREADS_STARTED ( (1<<num_rx_threads)-1)
--
--
--extern gmnal_data_t *global_nal_data;
--
--/*
-- * for ioctl get pid
-- */
--#define GMNAL_IOC_GET_GNID 1
--
--/*
-- * Return codes
-- */
--#define GMNAL_STATUS_OK 0
--#define GMNAL_STATUS_FAIL 1
--#define GMNAL_STATUS_NOMEM 2
--
--
--/*
-- * FUNCTION PROTOTYPES
-- */
--
--/*
-- * Locking macros
-- */
--
--/*
-- * For the Small tx and rx descriptor lists
-- */
--#define GMNAL_TXD_LOCK_INIT(a) spin_lock_init(&a->stxd_lock);
--#define GMNAL_TXD_LOCK(a) spin_lock(&a->stxd_lock);
--#define GMNAL_TXD_UNLOCK(a) spin_unlock(&a->stxd_lock);
--#define GMNAL_TXD_TOKEN_INIT(a, n) sema_init(&a->stxd_token, n);
--#define GMNAL_TXD_GETTOKEN(a) down(&a->stxd_token);
--#define GMNAL_TXD_TRYGETTOKEN(a) down_trylock(&a->stxd_token)
--#define GMNAL_TXD_RETURNTOKEN(a) up(&a->stxd_token);
--
--#define GMNAL_RXT_TXD_LOCK_INIT(a) spin_lock_init(&a->rxt_stxd_lock);
--#define GMNAL_RXT_TXD_LOCK(a) spin_lock(&a->rxt_stxd_lock);
--#define GMNAL_RXT_TXD_UNLOCK(a) spin_unlock(&a->rxt_stxd_lock);
--#define GMNAL_RXT_TXD_TOKEN_INIT(a, n) sema_init(&a->rxt_stxd_token, n);
--#define GMNAL_RXT_TXD_GETTOKEN(a) down(&a->rxt_stxd_token);
--#define GMNAL_RXT_TXD_TRYGETTOKEN(a) down_trylock(&a->rxt_stxd_token)
--#define GMNAL_RXT_TXD_RETURNTOKEN(a) up(&a->rxt_stxd_token);
--
--#define GMNAL_LTXD_LOCK_INIT(a) spin_lock_init(&a->ltxd_lock);
--#define GMNAL_LTXD_LOCK(a) spin_lock(&a->ltxd_lock);
--#define GMNAL_LTXD_UNLOCK(a) spin_unlock(&a->ltxd_lock);
--#define GMNAL_LTXD_TOKEN_INIT(a, n) sema_init(&a->ltxd_token, n);
--#define GMNAL_LTXD_GETTOKEN(a) down(&a->ltxd_token);
--#define GMNAL_LTXD_TRYGETTOKEN(a) down_trylock(&a->ltxd_token)
--#define GMNAL_LTXD_RETURNTOKEN(a) up(&a->ltxd_token);
--
--#define GMNAL_RXD_LOCK_INIT(a) spin_lock_init(&a->srxd_lock);
--#define GMNAL_RXD_LOCK(a) spin_lock(&a->srxd_lock);
--#define GMNAL_RXD_UNLOCK(a) spin_unlock(&a->srxd_lock);
--#define GMNAL_RXD_TOKEN_INIT(a, n) sema_init(&a->srxd_token, n);
--#define GMNAL_RXD_GETTOKEN(a) down(&a->srxd_token);
--#define GMNAL_RXD_TRYGETTOKEN(a) down_trylock(&a->srxd_token)
--#define GMNAL_RXD_RETURNTOKEN(a) up(&a->srxd_token);
--
--#define GMNAL_GM_LOCK_INIT(a) spin_lock_init(&a->gm_lock);
--#define GMNAL_GM_LOCK(a) spin_lock(&a->gm_lock);
--#define GMNAL_GM_UNLOCK(a) spin_unlock(&a->gm_lock);
- #define GMNAL_CB_LOCK_INIT(a) spin_lock_init(&a->cb_lock);
--
--
--/*
-- * Memory Allocator
-- */
--
--/*
-- * API NAL
-- */
-int gmnal_api_startup(nal_t *, ptl_pid_t,
- ptl_ni_limits_t *, ptl_ni_limits_t *);
-
--int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t);
--
- int gmnal_api_shutdown(nal_t *, int);
-void gmnal_api_shutdown(nal_t *);
--
--int gmnal_api_validate(nal_t *, void *, size_t);
--
- void gmnal_api_yield(nal_t *);
-void gmnal_api_yield(nal_t *, unsigned long *, int);
--
--void gmnal_api_lock(nal_t *, unsigned long *);
--
--void gmnal_api_unlock(nal_t *, unsigned long *);
--
--
--#define GMNAL_INIT_NAL(a) do { \
- a->forward = gmnal_api_forward; \
- a->shutdown = gmnal_api_shutdown; \
- a->validate = NULL; \
- a->yield = gmnal_api_yield; \
- a->lock = gmnal_api_lock; \
- a->unlock = gmnal_api_unlock; \
- a->timeout = NULL; \
- a->refct = 1; \
- a->nal_data = NULL; \
- (a)->nal_ni_init = gmnal_api_startup; \
- (a)->nal_ni_fini = gmnal_api_shutdown; \
- (a)->nal_data = NULL; \
-- } while (0)
--
--
--/*
-- * CB NAL
-- */
-
- int gmnal_cb_send(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
- int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t);
-
- int gmnal_cb_send_pages(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
- int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t);
-
- int gmnal_cb_recv(nal_cb_t *, void *, lib_msg_t *,
- unsigned int, struct iovec *, size_t, size_t);
-
- int gmnal_cb_recv_pages(nal_cb_t *, void *, lib_msg_t *,
- unsigned int, ptl_kiov_t *, size_t, size_t);
-
- int gmnal_cb_read(nal_cb_t *, void *private, void *, user_ptr, size_t);
-
- int gmnal_cb_write(nal_cb_t *, void *private, user_ptr, void *, size_t);
-
- int gmnal_cb_callback(nal_cb_t *, void *, lib_eq_t *, ptl_event_t *);
-
- void *gmnal_cb_malloc(nal_cb_t *, size_t);
-
- void gmnal_cb_free(nal_cb_t *, void *, size_t);
-
- void gmnal_cb_unmap(nal_cb_t *, unsigned int, struct iovec*, void **);
--
- int gmnal_cb_map(nal_cb_t *, unsigned int, struct iovec*, void **);
-ptl_err_t gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
- int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t, size_t);
--
- void gmnal_cb_printf(nal_cb_t *, const char *fmt, ...);
-ptl_err_t gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
- int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t, size_t);
--
- void gmnal_cb_cli(nal_cb_t *, unsigned long *);
-ptl_err_t gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *,
- unsigned int, struct iovec *, size_t, size_t, size_t);
--
- void gmnal_cb_sti(nal_cb_t *, unsigned long *);
-ptl_err_t gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *,
- unsigned int, ptl_kiov_t *, size_t, size_t, size_t);
--
- int gmnal_cb_dist(nal_cb_t *, ptl_nid_t, unsigned long *);
-int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *);
--
- nal_t *gmnal_init(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t rpid);
-int gmnal_init(void);
--
--void gmnal_fini(void);
--
--
--
--#define GMNAL_INIT_NAL_CB(a) do { \
- a->cb_send = gmnal_cb_send; \
- a->cb_send_pages = gmnal_cb_send_pages; \
- a->cb_recv = gmnal_cb_recv; \
- a->cb_recv_pages = gmnal_cb_recv_pages; \
- a->cb_read = gmnal_cb_read; \
- a->cb_write = gmnal_cb_write; \
- a->cb_callback = gmnal_cb_callback; \
- a->cb_malloc = gmnal_cb_malloc; \
- a->cb_free = gmnal_cb_free; \
- a->cb_map = NULL; \
- a->cb_unmap = NULL; \
- a->cb_printf = gmnal_cb_printf; \
- a->cb_cli = gmnal_cb_cli; \
- a->cb_sti = gmnal_cb_sti; \
- a->cb_dist = gmnal_cb_dist; \
- a->nal_data = NULL; \
- a->libnal_send = gmnal_cb_send; \
- a->libnal_send_pages = gmnal_cb_send_pages; \
- a->libnal_recv = gmnal_cb_recv; \
- a->libnal_recv_pages = gmnal_cb_recv_pages; \
- a->libnal_map = NULL; \
- a->libnal_unmap = NULL; \
- a->libnal_dist = gmnal_cb_dist; \
- a->libnal_data = NULL; \
-- } while (0)
--
--
--/*
-- * Small and Large Transmit and Receive Descriptor Functions
-- */
--int gmnal_alloc_txd(gmnal_data_t *);
--void gmnal_free_txd(gmnal_data_t *);
--gmnal_stxd_t* gmnal_get_stxd(gmnal_data_t *, int);
--void gmnal_return_stxd(gmnal_data_t *, gmnal_stxd_t *);
--gmnal_ltxd_t* gmnal_get_ltxd(gmnal_data_t *);
--void gmnal_return_ltxd(gmnal_data_t *, gmnal_ltxd_t *);
--
--int gmnal_alloc_srxd(gmnal_data_t *);
--void gmnal_free_srxd(gmnal_data_t *);
--gmnal_srxd_t* gmnal_get_srxd(gmnal_data_t *, int);
--void gmnal_return_srxd(gmnal_data_t *, gmnal_srxd_t *);
--
--/*
-- * general utility functions
-- */
--gmnal_srxd_t *gmnal_rxbuffer_to_srxd(gmnal_data_t *, void*);
--void gmnal_stop_rxthread(gmnal_data_t *);
--void gmnal_stop_ctthread(gmnal_data_t *);
--void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
--void gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
--void gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
--char *gmnal_gm_error(gm_status_t);
--char *gmnal_rxevent(gm_recv_event_t*);
--int gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int);
--void gmnal_yield(int);
--int gmnal_start_kernel_threads(gmnal_data_t *);
--
--
--/*
-- * Communication functions
-- */
--
--/*
-- * Receive threads
-- */
--int gmnal_ct_thread(void *); /* caretaker thread */
--int gmnal_rx_thread(void *); /* receive thread */
--int gmnal_pre_receive(gmnal_data_t*, gmnal_rxtwe_t*, int);
--int gmnal_rx_bad(gmnal_data_t *, gmnal_rxtwe_t *, gmnal_srxd_t*);
--int gmnal_rx_requeue_buffer(gmnal_data_t *, gmnal_srxd_t *);
--int gmnal_add_rxtwe(gmnal_data_t *, gm_recv_t *);
--gmnal_rxtwe_t * gmnal_get_rxtwe(gmnal_data_t *);
--void gmnal_remove_rxtwe(gmnal_data_t *);
--
--
--/*
-- * Small messages
-- */
- int gmnal_small_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int,
- struct iovec *, size_t, size_t);
- int gmnal_small_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
-int gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int,
- struct iovec *, size_t, size_t, size_t);
-int gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
-- int, ptl_nid_t, ptl_pid_t,
- unsigned int, struct iovec*, int);
- unsigned int, struct iovec*, size_t, int);
--void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
--
--
--
--/*
-- * Large messages
-- */
- int gmnal_large_rx(nal_cb_t *, void *, lib_msg_t *, unsigned int,
- struct iovec *, size_t, size_t);
-int gmnal_large_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int,
- struct iovec *, size_t, size_t, size_t);
--
- int gmnal_large_tx(nal_cb_t *, void *, lib_msg_t *, ptl_hdr_t *,
-int gmnal_large_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
-- int, ptl_nid_t, ptl_pid_t, unsigned int,
- struct iovec*, int);
- struct iovec*, size_t, int);
--
--void gmnal_large_tx_callback(gm_port_t *, void *, gm_status_t);
--
--int gmnal_remote_get(gmnal_srxd_t *, int, struct iovec*, int,
-- struct iovec*);
--
--void gmnal_remote_get_callback(gm_port_t *, void *, gm_status_t);
--
--int gmnal_copyiov(int, gmnal_srxd_t *, int, struct iovec*, int,
-- struct iovec*);
--
--void gmnal_large_tx_ack(gmnal_data_t *, gmnal_srxd_t *);
--void gmnal_large_tx_ack_callback(gm_port_t *, void *, gm_status_t);
--void gmnal_large_tx_ack_received(gmnal_data_t *, gmnal_srxd_t *);
--
--#endif /*__INCLUDE_GMNAL_H__*/
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
-- *
-- * This file is part of Lustre, http://www.lustre.org/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/*
-- * Implements the API NAL functions
-- */
--
--#include "gmnal.h"
--
--
--
--gmnal_data_t *global_nal_data = NULL;
--#define GLOBAL_NID_STR_LEN 16
--char global_nid_str[GLOBAL_NID_STR_LEN] = {0};
-ptl_handle_ni_t kgmnal_ni;
-
-extern int gmnal_cmd(struct portals_cfg *pcfg, void *private);
--
--/*
-- * Write the global nid /proc/sys/gmnal/globalnid
-- */
--#define GMNAL_SYSCTL 201
--#define GMNAL_SYSCTL_GLOBALNID 1
--
--static ctl_table gmnal_sysctl_table[] = {
-- {GMNAL_SYSCTL_GLOBALNID, "globalnid",
-- global_nid_str, GLOBAL_NID_STR_LEN,
-- 0444, NULL, &proc_dostring},
-- { 0 }
--};
--
--
--static ctl_table gmnalnal_top_sysctl_table[] = {
-- {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table},
-- { 0 }
--};
-
-
-
-
-
-
- /*
- * gmnal_api_forward
- * This function takes a pack block of arguments from the NAL API
- * module and passes them to the NAL CB module. The CB module unpacks
- * the args and calls the appropriate function indicated by index.
- * Typically this function is used to pass args between kernel and use
- * space.
- * As lgmanl exists entirely in kernel, just pass the arg block directly
- * to the NAL CB, buy passing the args to lib_dispatch
- * Arguments are
- * nal_t nal Our nal
- * int index the api function that initiated this call
- * void *args packed block of function args
- * size_t arg_len length of args block
- * void *ret A return value for the API NAL
- * size_t ret_len Size of the return value
- *
- */
-
- int
- gmnal_api_forward(nal_t *nal, int index, void *args, size_t arg_len,
- void *ret, size_t ret_len)
- {
-
- nal_cb_t *nal_cb = NULL;
- gmnal_data_t *nal_data = NULL;
-
-
-
-
-
- if (!nal || !args || (index < 0) || (arg_len < 0)) {
- CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
- return (PTL_FAIL);
- }
-
- if (ret && (ret_len <= 0)) {
- CDEBUG(D_ERROR, "Bad args to gmnal_api_forward\n");
- return (PTL_FAIL);
- }
-
-
- if (!nal->nal_data) {
- CDEBUG(D_ERROR, "bad nal, no nal data\n");
- return (PTL_FAIL);
- }
-
- nal_data = nal->nal_data;
- CDEBUG(D_INFO, "nal_data is [%p]\n", nal_data);
-
- if (!nal_data->nal_cb) {
- CDEBUG(D_ERROR, "bad nal_data, no nal_cb\n");
- return (PTL_FAIL);
- }
-
- nal_cb = nal_data->nal_cb;
- CDEBUG(D_INFO, "nal_cb is [%p]\n", nal_cb);
-
- CDEBUG(D_PORTALS, "gmnal_api_forward calling lib_dispatch\n");
- lib_dispatch(nal_cb, NULL, index, args, ret);
- CDEBUG(D_PORTALS, "gmnal_api_forward returns from lib_dispatch\n");
-
- return(PTL_OK);
- }
-
--
--/*
-- * gmnal_api_shutdown
- * nal_refct == 0 => called on last matching PtlNIFini()
-- * Close down this interface and free any resources associated with it
-- * nal_t nal our nal to shutdown
- */
- int
- gmnal_api_shutdown(nal_t *nal, int interface)
- {
-
- gmnal_data_t *nal_data = nal->nal_data;
-
- CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
-
- return(PTL_OK);
- }
-
-
- /*
- * gmnal_api_validate
- * validate a user address for use in communications
- * There's nothing to be done here
- */
- int
- gmnal_api_validate(nal_t *nal, void *base, size_t extent)
- {
-
- return(PTL_OK);
- }
-
-
-
- /*
- * gmnal_api_yield
- * Give up the processor
- */
- void
- gmnal_api_yield(nal_t *nal)
- {
- CDEBUG(D_TRACE, "gmnal_api_yield : nal [%p]\n", nal);
-
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
-
- return;
- }
-
-
-
- /*
- * gmnal_api_lock
- * Take a threadsafe lock
-- */
--void
- gmnal_api_lock(nal_t *nal, unsigned long *flags)
-gmnal_api_shutdown(nal_t *nal)
--{
-
-- gmnal_data_t *nal_data;
- nal_cb_t *nal_cb;
-
- nal_data = nal->nal_data;
- nal_cb = nal_data->nal_cb;
- lib_nal_t *libnal;
--
- nal_cb->cb_cli(nal_cb, flags);
- if (nal->nal_refct != 0)
- return;
-
--
- return;
- }
- LASSERT(nal == global_nal_data->nal);
- libnal = (lib_nal_t *)nal->nal_data;
- nal_data = (gmnal_data_t *)libnal->libnal_data;
- LASSERT(nal_data == global_nal_data);
- CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
--
- /*
- * gmnal_api_unlock
- * Release a threadsafe lock
- */
- void
- gmnal_api_unlock(nal_t *nal, unsigned long *flags)
- {
- gmnal_data_t *nal_data;
- nal_cb_t *nal_cb;
- /* Stop portals calling our ioctl handler */
- libcfs_nal_cmd_unregister(GMNAL);
--
- nal_data = nal->nal_data;
- nal_cb = nal_data->nal_cb;
- /* XXX for shutdown "under fire" we probably need to set a shutdown
- * flag so when lib calls us we fail immediately and dont queue any
- * more work but our threads can still call into lib OK. THEN
- * shutdown our threads, THEN lib_fini() */
- lib_fini(libnal);
--
- nal_cb->cb_sti(nal_cb, flags);
- gmnal_stop_rxthread(nal_data);
- gmnal_stop_ctthread(nal_data);
- gmnal_free_txd(nal_data);
- gmnal_free_srxd(nal_data);
- GMNAL_GM_LOCK(nal_data);
- gm_close(nal_data->gm_port);
- gm_finalize();
- GMNAL_GM_UNLOCK(nal_data);
- if (nal_data->sysctl)
- unregister_sysctl_table (nal_data->sysctl);
- /* Don't free 'nal'; it's a static struct */
- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(libnal, sizeof(lib_nal_t));
--
- return;
- global_nal_data = NULL;
- PORTAL_MODULE_UNUSE;
--}
--
--
- nal_t *
- gmnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
- ptl_pid_t rpid)
-int
-gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
- ptl_ni_limits_t *requested_limits,
- ptl_ni_limits_t *actual_limits)
--{
--
- nal_t *nal = NULL;
- nal_cb_t *nal_cb = NULL;
- lib_nal_t *libnal = NULL;
-- gmnal_data_t *nal_data = NULL;
-- gmnal_srxd_t *srxd = NULL;
-- gm_status_t gm_status;
-- unsigned int local_nid = 0, global_nid = 0;
- ptl_nid_t portals_nid;
- ptl_pid_t portals_pid = 0;
- ptl_process_id_t process_id;
--
- if (nal->nal_refct != 0) {
- if (actual_limits != NULL) {
- libnal = (lib_nal_t *)nal->nal_data;
- *actual_limits = libnal->libnal_ni.ni_actual_limits;
- }
- return (PTL_OK);
- }
--
- CDEBUG(D_TRACE, "gmnal_init : interface [%d], ptl_size [%d], "
- "ac_size[%d]\n", interface, ptl_size, ac_size);
- /* Called on first PtlNIInit() */
-
- CDEBUG(D_TRACE, "startup\n");
--
- LASSERT(global_nal_data == NULL);
--
-- PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
-- if (!nal_data) {
-- CDEBUG(D_ERROR, "can't get memory\n");
- return(NULL);
- return(PTL_NO_SPACE);
-- }
-- memset(nal_data, 0, sizeof(gmnal_data_t));
-- /*
-- * set the small message buffer size
-- */
- nal_data->refcnt = 1;
--
-- CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data);
-- CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size);
-
- PORTAL_ALLOC(nal, sizeof(nal_t));
- if (!nal) {
- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- return(NULL);
- }
- memset(nal, 0, sizeof(nal_t));
- CDEBUG(D_INFO, "Allocd and reset nal[%p]\n", nal);
--
- PORTAL_ALLOC(nal_cb, sizeof(nal_cb_t));
- if (!nal_cb) {
- PORTAL_FREE(nal, sizeof(nal_t));
- PORTAL_ALLOC(libnal, sizeof(lib_nal_t));
- if (!libnal) {
-- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- return(NULL);
- return(PTL_NO_SPACE);
-- }
- memset(nal_cb, 0, sizeof(nal_cb_t));
- CDEBUG(D_INFO, "Allocd and reset nal_cb[%p]\n", nal_cb);
- memset(libnal, 0, sizeof(lib_nal_t));
- CDEBUG(D_INFO, "Allocd and reset libnal[%p]\n", libnal);
--
- GMNAL_INIT_NAL(nal);
- GMNAL_INIT_NAL_CB(nal_cb);
- GMNAL_INIT_NAL_CB(libnal);
-- /*
-- * String them all together
-- */
- nal->nal_data = (void*)nal_data;
- nal_cb->nal_data = (void*)nal_data;
- libnal->libnal_data = (void*)nal_data;
-- nal_data->nal = nal;
- nal_data->nal_cb = nal_cb;
- nal_data->libnal = libnal;
--
- GMNAL_CB_LOCK_INIT(nal_data);
-- GMNAL_GM_LOCK_INIT(nal_data);
--
--
-- /*
-- * initialise the interface,
-- */
-- CDEBUG(D_INFO, "Calling gm_init\n");
-- if (gm_init() != GM_SUCCESS) {
-- CDEBUG(D_ERROR, "call to gm_init failed\n");
- PORTAL_FREE(nal, sizeof(nal_t));
-- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
- return(NULL);
- PORTAL_FREE(libnal, sizeof(lib_nal_t));
- return(PTL_FAIL);
-- }
--
--
- CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], "
- "name [%s], version [%d]\n", interface, GMNAL_GM_PORT,
- CDEBUG(D_NET, "Calling gm_open with port [%d], "
- "name [%s], version [%d]\n", GMNAL_GM_PORT,
-- "gmnal", GM_API_VERSION);
--
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT, "gmnal",
-- GM_API_VERSION);
-- GMNAL_GM_UNLOCK(nal_data);
--
-- CDEBUG(D_INFO, "gm_open returned [%d]\n", gm_status);
-- if (gm_status == GM_SUCCESS) {
-- CDEBUG(D_INFO, "gm_open succeeded port[%p]\n",
-- nal_data->gm_port);
-- } else {
-- switch(gm_status) {
-- case(GM_INVALID_PARAMETER):
-- CDEBUG(D_ERROR, "gm_open Failure. Invalid Parameter\n");
-- break;
-- case(GM_BUSY):
-- CDEBUG(D_ERROR, "gm_open Failure. GM Busy\n");
-- break;
-- case(GM_NO_SUCH_DEVICE):
-- CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
-- break;
-- case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-- CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
-- "and driver\n");
-- break;
-- case(GM_OUT_OF_MEMORY):
-- CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
-- break;
-- default:
-- CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
-- "code [%d]\n", gm_status);
-- break;
-- }
-- GMNAL_GM_LOCK(nal_data);
-- gm_finalize();
-- GMNAL_GM_UNLOCK(nal_data);
- PORTAL_FREE(nal, sizeof(nal_t));
-- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
- return(NULL);
- PORTAL_FREE(libnal, sizeof(lib_nal_t));
- return(PTL_FAIL);
-- }
--
--
-- nal_data->small_msg_size = gmnal_small_msg_size;
-- nal_data->small_msg_gmsize =
-- gm_min_size_for_length(gmnal_small_msg_size);
--
-- if (gmnal_alloc_srxd(nal_data) != GMNAL_STATUS_OK) {
-- CDEBUG(D_ERROR, "Failed to allocate small rx descriptors\n");
-- gmnal_free_txd(nal_data);
-- GMNAL_GM_LOCK(nal_data);
-- gm_close(nal_data->gm_port);
-- gm_finalize();
-- GMNAL_GM_UNLOCK(nal_data);
- PORTAL_FREE(nal, sizeof(nal_t));
-- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
- return(NULL);
- PORTAL_FREE(libnal, sizeof(lib_nal_t));
- return(PTL_FAIL);
-- }
--
--
-- /*
-- * Hang out a bunch of small receive buffers
-- * In fact hang them all out
-- */
-- while((srxd = gmnal_get_srxd(nal_data, 0))) {
-- CDEBUG(D_NET, "giving [%p] to gm_provide_recvive_buffer\n",
-- srxd->buffer);
-- GMNAL_GM_LOCK(nal_data);
-- gm_provide_receive_buffer_with_tag(nal_data->gm_port,
-- srxd->buffer, srxd->gmsize,
-- GM_LOW_PRIORITY, 0);
-- GMNAL_GM_UNLOCK(nal_data);
-- }
--
-- /*
-- * Allocate pools of small tx buffers and descriptors
-- */
-- if (gmnal_alloc_txd(nal_data) != GMNAL_STATUS_OK) {
-- CDEBUG(D_ERROR, "Failed to allocate small tx descriptors\n");
-- GMNAL_GM_LOCK(nal_data);
-- gm_close(nal_data->gm_port);
-- gm_finalize();
-- GMNAL_GM_UNLOCK(nal_data);
- PORTAL_FREE(nal, sizeof(nal_t));
-- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
- return(NULL);
- PORTAL_FREE(libnal, sizeof(lib_nal_t));
- return(PTL_FAIL);
-- }
--
-- gmnal_start_kernel_threads(nal_data);
--
-- while (nal_data->rxthread_flag != GMNAL_RXTHREADS_STARTED) {
-- gmnal_yield(1);
-- CDEBUG(D_INFO, "Waiting for receive thread signs of life\n");
-- }
--
-- CDEBUG(D_INFO, "receive thread seems to have started\n");
--
--
-- /*
-- * Initialise the portals library
-- */
-- CDEBUG(D_NET, "Getting node id\n");
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_get_node_id(nal_data->gm_port, &local_nid);
-- GMNAL_GM_UNLOCK(nal_data);
-- if (gm_status != GM_SUCCESS) {
-- gmnal_stop_rxthread(nal_data);
-- gmnal_stop_ctthread(nal_data);
-- CDEBUG(D_ERROR, "can't determine node id\n");
-- gmnal_free_txd(nal_data);
-- gmnal_free_srxd(nal_data);
-- GMNAL_GM_LOCK(nal_data);
-- gm_close(nal_data->gm_port);
-- gm_finalize();
-- GMNAL_GM_UNLOCK(nal_data);
- PORTAL_FREE(nal, sizeof(nal_t));
-- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
- return(NULL);
- PORTAL_FREE(libnal, sizeof(lib_nal_t));
- return(PTL_FAIL);
-- }
-
-- nal_data->gm_local_nid = local_nid;
-- CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid);
-
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid,
-- &global_nid);
-- GMNAL_GM_UNLOCK(nal_data);
-- if (gm_status != GM_SUCCESS) {
-- CDEBUG(D_ERROR, "failed to obtain global id\n");
-- gmnal_stop_rxthread(nal_data);
-- gmnal_stop_ctthread(nal_data);
-- gmnal_free_txd(nal_data);
-- gmnal_free_srxd(nal_data);
-- GMNAL_GM_LOCK(nal_data);
-- gm_close(nal_data->gm_port);
-- gm_finalize();
-- GMNAL_GM_UNLOCK(nal_data);
- PORTAL_FREE(nal, sizeof(nal_t));
-- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
- return(NULL);
- PORTAL_FREE(libnal, sizeof(lib_nal_t));
- return(PTL_FAIL);
-- }
-- CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
-- nal_data->gm_global_nid = global_nid;
-- snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid);
--
--/*
-- pid = gm_getpid();
--*/
- CDEBUG(D_INFO, "portals_pid is [%u]\n", portals_pid);
- portals_nid = (unsigned long)global_nid;
- CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", portals_nid);
- process_id.pid = requested_pid;
- process_id.nid = global_nid;
-
- CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid);
- CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid);
--
-- CDEBUG(D_PORTALS, "calling lib_init\n");
- if (lib_init(nal_cb, portals_nid, portals_pid, 1024, ptl_size,
- ac_size) != PTL_OK) {
- if (lib_init(libnal, nal, process_id,
- requested_limits, actual_limits) != PTL_OK) {
-- CDEBUG(D_ERROR, "lib_init failed\n");
-- gmnal_stop_rxthread(nal_data);
-- gmnal_stop_ctthread(nal_data);
-- gmnal_free_txd(nal_data);
-- gmnal_free_srxd(nal_data);
-- GMNAL_GM_LOCK(nal_data);
-- gm_close(nal_data->gm_port);
-- gm_finalize();
-- GMNAL_GM_UNLOCK(nal_data);
- PORTAL_FREE(nal, sizeof(nal_t));
-- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
- return(NULL);
- PORTAL_FREE(libnal, sizeof(lib_nal_t));
- return(PTL_FAIL);
--
-- }
-
- if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, libnal->libnal_data) != 0) {
- CDEBUG(D_INFO, "libcfs_nal_cmd_register failed\n");
-
- /* XXX these cleanup cases should be restructured to
- * minimise duplication... */
- lib_fini(libnal);
-
- gmnal_stop_rxthread(nal_data);
- gmnal_stop_ctthread(nal_data);
- gmnal_free_txd(nal_data);
- gmnal_free_srxd(nal_data);
- GMNAL_GM_LOCK(nal_data);
- gm_close(nal_data->gm_port);
- gm_finalize();
- GMNAL_GM_UNLOCK(nal_data);
- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(libnal, sizeof(lib_nal_t));
- return(PTL_FAIL);
- }
-
- /* might be better to initialise this at module load rather than in
- * NAL startup */
-- nal_data->sysctl = NULL;
-- nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0);
--
--
-- CDEBUG(D_INFO, "gmnal_init finished\n");
-- global_nal_data = nal->nal_data;
- return(nal);
-
- /* no unload now until shutdown */
- PORTAL_MODULE_USE;
-
- return(PTL_OK);
--}
-
-nal_t the_gm_nal;
-
-/*
- * Called when module loaded
- */
-int gmnal_init(void)
-{
- int rc;
-
- memset(&the_gm_nal, 0, sizeof(nal_t));
- CDEBUG(D_INFO, "reset nal[%p]\n", &the_gm_nal);
- GMNAL_INIT_NAL(&the_gm_nal);
-
- rc = ptl_register_nal(GMNAL, &the_gm_nal);
- if (rc != PTL_OK)
- CERROR("Can't register GMNAL: %d\n", rc);
- rc = PtlNIInit(GMNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kgmnal_ni);
- if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
- ptl_unregister_nal(GMNAL);
- return (-ENODEV);
- }
--
- return (rc);
-}
--
-
--
--/*
-- * Called when module removed
-- */
--void gmnal_fini()
--{
- gmnal_data_t *nal_data = global_nal_data;
- nal_t *nal = nal_data->nal;
- nal_cb_t *nal_cb = nal_data->nal_cb;
-
-- CDEBUG(D_TRACE, "gmnal_fini\n");
--
- PtlNIFini(kgmnal_ni);
- lib_fini(nal_cb);
- LASSERT(global_nal_data == NULL);
- PtlNIFini(kgmnal_ni);
--
- gmnal_stop_rxthread(nal_data);
- gmnal_stop_ctthread(nal_data);
- gmnal_free_txd(nal_data);
- gmnal_free_srxd(nal_data);
- GMNAL_GM_LOCK(nal_data);
- gm_close(nal_data->gm_port);
- gm_finalize();
- GMNAL_GM_UNLOCK(nal_data);
- if (nal_data->sysctl)
- unregister_sysctl_table (nal_data->sysctl);
- PORTAL_FREE(nal, sizeof(nal_t));
- PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
- PORTAL_FREE(nal_cb, sizeof(nal_cb_t));
- ptl_unregister_nal(GMNAL);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
-- *
-- * This file is part of Lustre, http://www.lustre.org/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--
--/*
-- * This file implements the nal cb functions
-- */
--
--
--#include "gmnal.h"
--
- int gmnal_cb_recv(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
- unsigned int niov, struct iovec *iov, size_t mlen,
- size_t rlen)
-ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- unsigned int niov, struct iovec *iov, size_t offset,
- size_t mlen, size_t rlen)
--{
-- gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
-- int status = PTL_OK;
--
--
- CDEBUG(D_TRACE, "gmnal_cb_recv nal_cb [%p], private[%p], cookie[%p], "
- "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- nal_cb, private, cookie, niov, iov, mlen, rlen);
- CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], "
- "niov[%d], iov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n",
- libnal, private, cookie, niov, iov, offset, mlen, rlen);
--
-- switch(srxd->type) {
-- case(GMNAL_SMALL_MESSAGE):
-- CDEBUG(D_INFO, "gmnal_cb_recv got small message\n");
- status = gmnal_small_rx(nal_cb, private, cookie, niov,
- iov, mlen, rlen);
- status = gmnal_small_rx(libnal, private, cookie, niov,
- iov, offset, mlen, rlen);
-- break;
-- case(GMNAL_LARGE_MESSAGE_INIT):
-- CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n");
- status = gmnal_large_rx(nal_cb, private, cookie, niov,
- iov, mlen, rlen);
- status = gmnal_large_rx(libnal, private, cookie, niov,
- iov, offset, mlen, rlen);
-- }
--
--
-- CDEBUG(D_INFO, "gmnal_cb_recv gmnal_return status [%d]\n", status);
-- return(status);
--}
--
- int gmnal_cb_recv_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
- unsigned int kniov, ptl_kiov_t *kiov, size_t mlen,
- size_t rlen)
-ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- unsigned int kniov, ptl_kiov_t *kiov, size_t offset,
- size_t mlen, size_t rlen)
--{
-- gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
-- int status = PTL_OK;
-- struct iovec *iovec = NULL, *iovec_dup = NULL;
-- int i = 0;
-- ptl_kiov_t *kiov_dup = kiov;;
--
--
- CDEBUG(D_TRACE, "gmnal_cb_recv_pages nal_cb [%p],private[%p], "
- "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- nal_cb, private, cookie, kniov, kiov, mlen, rlen);
- CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], "
- "cookie[%p], kniov[%d], kiov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n",
- libnal, private, cookie, kniov, kiov, offset, mlen, rlen);
--
-- if (srxd->type == GMNAL_SMALL_MESSAGE) {
-- PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov);
-- if (!iovec) {
-- CDEBUG(D_ERROR, "Can't malloc\n");
-- return(GMNAL_STATUS_FAIL);
-- }
-- iovec_dup = iovec;
--
-- /*
-- * map each page and create an iovec for it
-- */
-- for (i=0; i<kniov; i++) {
-- CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
-- CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
-- kiov->kiov_page, kiov->kiov_len,
-- kiov->kiov_offset);
-- iovec->iov_len = kiov->kiov_len;
-- CDEBUG(D_INFO, "Calling kmap[%p]", kiov->kiov_page);
--
-- iovec->iov_base = kmap(kiov->kiov_page) +
-- kiov->kiov_offset;
--
-- CDEBUG(D_INFO, "iov_base is [%p]\n", iovec->iov_base);
-- iovec++;
-- kiov++;
-- }
-- CDEBUG(D_INFO, "calling gmnal_small_rx\n");
- status = gmnal_small_rx(nal_cb, private, cookie, kniov,
- iovec_dup, mlen, rlen);
- status = gmnal_small_rx(libnal, private, cookie, kniov,
- iovec_dup, offset, mlen, rlen);
-- for (i=0; i<kniov; i++) {
-- kunmap(kiov_dup->kiov_page);
-- kiov_dup++;
-- }
-- PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
-- }
--
--
-- CDEBUG(D_INFO, "gmnal_return status [%d]\n", status);
-- return(status);
--}
--
--
- int gmnal_cb_send(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
-ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
-- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov, size_t len)
- unsigned int niov, struct iovec *iov, size_t offset, size_t len)
--{
--
-- gmnal_data_t *nal_data;
--
--
- CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] len["LPSZ"] nid["LPU64"]\n",
- niov, len, nid);
- nal_data = nal_cb->nal_data;
- CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] offset["LPSZ"] len["LPSZ"] nid["LPU64"]\n",
- niov, offset, len, nid);
- nal_data = libnal->libnal_data;
--
-- if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) {
-- CDEBUG(D_INFO, "This is a small message send\n");
- gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid, pid,
- niov, iov, len);
- gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid,
- niov, iov, offset, len);
-- } else {
-- CDEBUG(D_ERROR, "Large message send it is not supported\n");
- lib_finalize(nal_cb, private, cookie, PTL_FAIL);
- lib_finalize(libnal, private, cookie, PTL_FAIL);
-- return(PTL_FAIL);
- gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid, pid,
- niov, iov, len);
- gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid,
- niov, iov, offset, len);
-- }
-- return(PTL_OK);
--}
--
- int gmnal_cb_send_pages(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, unsigned int kniov, ptl_kiov_t *kiov, size_t len)
-ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int kniov, ptl_kiov_t *kiov, size_t offset, size_t len)
--{
--
-- int i = 0;
-- gmnal_data_t *nal_data;
-- struct iovec *iovec = NULL, *iovec_dup = NULL;
-- ptl_kiov_t *kiov_dup = kiov;
--
- CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
- nal_data = nal_cb->nal_data;
- CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] offset["LPSZ"] len["LPSZ"]\n",
- nid, kniov, offset, len);
- nal_data = libnal->libnal_data;
-- PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
-- iovec_dup = iovec;
-- if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) {
-- CDEBUG(D_INFO, "This is a small message send\n");
--
-- for (i=0; i<kniov; i++) {
-- CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
-- CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
-- kiov->kiov_page, kiov->kiov_len,
-- kiov->kiov_offset);
--
-- iovec->iov_base = kmap(kiov->kiov_page)
-- + kiov->kiov_offset;
--
-- iovec->iov_len = kiov->kiov_len;
-- iovec++;
-- kiov++;
-- }
- gmnal_small_tx(nal_cb, private, cookie, hdr, type, nid,
- pid, kniov, iovec_dup, len);
- gmnal_small_tx(libnal, private, cookie, hdr, type, nid,
- pid, kniov, iovec_dup, offset, len);
-- } else {
-- CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
-- return(PTL_FAIL);
-- for (i=0; i<kniov; i++) {
-- CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
-- CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
-- kiov->kiov_page, kiov->kiov_len,
-- kiov->kiov_offset);
--
-- iovec->iov_base = kmap(kiov->kiov_page)
-- + kiov->kiov_offset;
-- iovec->iov_len = kiov->kiov_len;
-- iovec++;
-- kiov++;
-- }
- gmnal_large_tx(nal_cb, private, cookie, hdr, type, nid,
- pid, kniov, iovec, len);
- gmnal_large_tx(libnal, private, cookie, hdr, type, nid,
- pid, kniov, iovec, offset, len);
-- }
-- for (i=0; i<kniov; i++) {
-- kunmap(kiov_dup->kiov_page);
-- kiov_dup++;
-- }
-- PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
- return(PTL_OK);
- }
-
- int gmnal_cb_read(nal_cb_t *nal_cb, void *private, void *dst,
- user_ptr src, size_t len)
- {
- gm_bcopy(src, dst, len);
- return(PTL_OK);
- }
-
- int gmnal_cb_write(nal_cb_t *nal_cb, void *private, user_ptr dst,
- void *src, size_t len)
- {
- gm_bcopy(src, dst, len);
- return(PTL_OK);
- }
-
- int gmnal_cb_callback(nal_cb_t *nal_cb, void *private, lib_eq_t *eq,
- ptl_event_t *ev)
- {
-
- if (eq->event_callback != NULL) {
- CDEBUG(D_INFO, "found callback\n");
- eq->event_callback(ev);
- }
-
- return(PTL_OK);
- }
-
- void *gmnal_cb_malloc(nal_cb_t *nal_cb, size_t len)
- {
- void *ptr = NULL;
- CDEBUG(D_TRACE, "gmnal_cb_malloc len["LPSZ"]\n", len);
- PORTAL_ALLOC(ptr, len);
- return(ptr);
- }
-
- void gmnal_cb_free(nal_cb_t *nal_cb, void *buf, size_t len)
- {
- CDEBUG(D_TRACE, "gmnal_cb_free :: buf[%p] len["LPSZ"]\n", buf, len);
- PORTAL_FREE(buf, len);
- return;
- }
-
- void gmnal_cb_unmap(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov,
- void **addrkey)
- {
- return;
- }
-
- int gmnal_cb_map(nal_cb_t *nal_cb, unsigned int niov, struct iovec *iov,
- void**addrkey)
- {
-- return(PTL_OK);
- }
-
- void gmnal_cb_printf(nal_cb_t *nal_cb, const char *fmt, ...)
- {
- CDEBUG(D_TRACE, "gmnal_cb_printf\n");
- printk(fmt);
- return;
- }
-
- void gmnal_cb_cli(nal_cb_t *nal_cb, unsigned long *flags)
- {
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
- spin_lock_irqsave(&nal_data->cb_lock, *flags);
- return;
- }
-
- void gmnal_cb_sti(nal_cb_t *nal_cb, unsigned long *flags)
- {
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
-
- spin_unlock_irqrestore(&nal_data->cb_lock, *flags);
- return;
--}
--
- int gmnal_cb_dist(nal_cb_t *nal_cb, ptl_nid_t nid, unsigned long *dist)
-int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist)
--{
-- CDEBUG(D_TRACE, "gmnal_cb_dist\n");
-- if (dist)
-- *dist = 27;
-- return(PTL_OK);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
-- *
-- * This file is part of Lustre, http://www.lustre.org/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/*
-- * This file contains all gmnal send and receive functions
-- */
--
--#include "gmnal.h"
--
--/*
-- * The caretaker thread
-- * This is main thread of execution for the NAL side
-- * This guy waits in gm_blocking_recvive and gets
-- * woken up when the myrinet adaptor gets an interrupt.
-- * Hands off receive operations to the receive thread
-- * This thread Looks after gm_callbacks etc inline.
-- */
--int
--gmnal_ct_thread(void *arg)
--{
-- gmnal_data_t *nal_data;
-- gm_recv_event_t *rxevent = NULL;
-- gm_recv_t *recv = NULL;
--
-- if (!arg) {
-- CDEBUG(D_TRACE, "NO nal_data. Exiting\n");
-- return(-1);
-- }
--
-- nal_data = (gmnal_data_t*)arg;
-- CDEBUG(D_TRACE, "nal_data is [%p]\n", arg);
--
-- daemonize();
--
-- nal_data->ctthread_flag = GMNAL_CTTHREAD_STARTED;
--
-- GMNAL_GM_LOCK(nal_data);
-- while(nal_data->ctthread_flag == GMNAL_CTTHREAD_STARTED) {
-- CDEBUG(D_NET, "waiting\n");
-- rxevent = gm_blocking_receive_no_spin(nal_data->gm_port);
-- if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) {
-- CDEBUG(D_INFO, "time to exit\n");
-- break;
-- }
-- CDEBUG(D_INFO, "got [%s]\n", gmnal_rxevent(rxevent));
-- switch (GM_RECV_EVENT_TYPE(rxevent)) {
--
-- case(GM_RECV_EVENT):
-- CDEBUG(D_NET, "CTTHREAD:: GM_RECV_EVENT\n");
-- recv = (gm_recv_t*)&rxevent->recv;
-- GMNAL_GM_UNLOCK(nal_data);
-- gmnal_add_rxtwe(nal_data, recv);
-- GMNAL_GM_LOCK(nal_data);
-- CDEBUG(D_NET, "CTTHREAD:: Added event to Q\n");
-- break;
-- case(_GM_SLEEP_EVENT):
-- /*
-- * Blocking receive above just returns
-- * immediatly with _GM_SLEEP_EVENT
-- * Don't know what this is
-- */
-- CDEBUG(D_NET, "Sleeping in gm_unknown\n");
-- GMNAL_GM_UNLOCK(nal_data);
-- gm_unknown(nal_data->gm_port, rxevent);
-- GMNAL_GM_LOCK(nal_data);
-- CDEBUG(D_INFO, "Awake from gm_unknown\n");
-- break;
--
-- default:
-- /*
-- * Don't know what this is
-- * gm_unknown will make sense of it
-- * Should be able to do something with
-- * FAST_RECV_EVENTS here.
-- */
-- CDEBUG(D_NET, "Passing event to gm_unknown\n");
-- GMNAL_GM_UNLOCK(nal_data);
-- gm_unknown(nal_data->gm_port, rxevent);
-- GMNAL_GM_LOCK(nal_data);
-- CDEBUG(D_INFO, "Processed unknown event\n");
-- }
-- }
-- GMNAL_GM_UNLOCK(nal_data);
-- nal_data->ctthread_flag = GMNAL_THREAD_RESET;
-- CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data);
-- return(GMNAL_STATUS_OK);
--}
--
--
--/*
-- * process a receive event
-- */
--int gmnal_rx_thread(void *arg)
--{
-- gmnal_data_t *nal_data;
-- void *buffer;
-- gmnal_rxtwe_t *we = NULL;
--
-- if (!arg) {
-- CDEBUG(D_TRACE, "NO nal_data. Exiting\n");
-- return(-1);
-- }
--
-- nal_data = (gmnal_data_t*)arg;
-- CDEBUG(D_TRACE, "nal_data is [%p]\n", arg);
--
-- daemonize();
-- /*
-- * set 1 bit for each thread started
-- * doesn't matter which bit
-- */
-- spin_lock(&nal_data->rxthread_flag_lock);
-- if (nal_data->rxthread_flag)
-- nal_data->rxthread_flag=nal_data->rxthread_flag*2 + 1;
-- else
-- nal_data->rxthread_flag = 1;
-- CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag);
-- spin_unlock(&nal_data->rxthread_flag_lock);
--
-- while(nal_data->rxthread_stop_flag != GMNAL_THREAD_STOP) {
-- CDEBUG(D_NET, "RXTHREAD:: Receive thread waiting\n");
-- we = gmnal_get_rxtwe(nal_data);
-- if (!we) {
-- CDEBUG(D_INFO, "Receive thread time to exit\n");
-- break;
-- }
--
-- buffer = we->buffer;
-- switch(((gmnal_msghdr_t*)buffer)->type) {
-- case(GMNAL_SMALL_MESSAGE):
-- gmnal_pre_receive(nal_data, we,
-- GMNAL_SMALL_MESSAGE);
-- break;
-- case(GMNAL_LARGE_MESSAGE_INIT):
-- gmnal_pre_receive(nal_data, we,
-- GMNAL_LARGE_MESSAGE_INIT);
-- break;
-- case(GMNAL_LARGE_MESSAGE_ACK):
-- gmnal_pre_receive(nal_data, we,
-- GMNAL_LARGE_MESSAGE_ACK);
-- break;
-- default:
-- CDEBUG(D_ERROR, "Unsupported message type\n");
-- gmnal_rx_bad(nal_data, we, NULL);
-- }
-- PORTAL_FREE(we, sizeof(gmnal_rxtwe_t));
-- }
--
-- spin_lock(&nal_data->rxthread_flag_lock);
-- nal_data->rxthread_flag/=2;
-- CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag);
-- spin_unlock(&nal_data->rxthread_flag_lock);
-- CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data);
-- return(GMNAL_STATUS_OK);
--}
--
--
--
--/*
-- * Start processing a small message receive
-- * Get here from gmnal_receive_thread
-- * Hand off to lib_parse, which calls cb_recv
-- * which hands back to gmnal_small_receive
-- * Deal with all endian stuff here.
-- */
--int
--gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
--{
-- gmnal_srxd_t *srxd = NULL;
-- void *buffer = NULL;
-- unsigned int snode, sport, type, length;
-- gmnal_msghdr_t *gmnal_msghdr;
-- ptl_hdr_t *portals_hdr;
- int rc;
--
-- CDEBUG(D_INFO, "nal_data [%p], we[%p] type [%d]\n",
-- nal_data, we, gmnal_type);
--
-- buffer = we->buffer;
-- snode = we->snode;
-- sport = we->sport;
-- type = we->type;
-- buffer = we->buffer;
-- length = we->length;
--
-- gmnal_msghdr = (gmnal_msghdr_t*)buffer;
-- portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE);
--
-- CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], "
-- "type [%d], length [%d], buffer [%p]\n",
-- snode, sport, type, length, buffer);
-- CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], "
-- "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id,
-- gmnal_msghdr->magic, gmnal_msghdr->type);
-- CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], "
-- "dest_node ["LPD64"]\n", portals_hdr->src_nid,
-- portals_hdr->dest_nid);
--
--
-- /*
-- * Get a receive descriptor for this message
-- */
-- srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer);
-- CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n");
- srxd->nal_data = nal_data;
-- if (!srxd) {
-- CDEBUG(D_ERROR, "Failed to get receive descriptor\n");
- lib_parse(nal_data->nal_cb, portals_hdr, srxd);
- /* I think passing a NULL srxd to lib_parse will crash
- * gmnal_recv() */
- LBUG();
- lib_parse(nal_data->libnal, portals_hdr, srxd);
-- return(GMNAL_STATUS_FAIL);
-- }
--
-- /*
-- * no need to bother portals library with this
-- */
-- if (gmnal_type == GMNAL_LARGE_MESSAGE_ACK) {
-- gmnal_large_tx_ack_received(nal_data, srxd);
-- return(GMNAL_STATUS_OK);
-- }
--
- srxd->nal_data = nal_data;
-- srxd->type = gmnal_type;
-- srxd->nsiov = gmnal_msghdr->niov;
-- srxd->gm_source_node = gmnal_msghdr->sender_node_id;
--
-- CDEBUG(D_PORTALS, "Calling lib_parse buffer is [%p]\n",
-- buffer+GMNAL_MSGHDR_SIZE);
-- /*
-- * control passes to lib, which calls cb_recv
-- * cb_recv is responsible for returning the buffer
-- * for future receive
-- */
- lib_parse(nal_data->nal_cb, portals_hdr, srxd);
- rc = lib_parse(nal_data->libnal, portals_hdr, srxd);
-
- if (rc != PTL_OK) {
- /* I just received garbage; take appropriate action... */
- LBUG();
- }
--
-- return(GMNAL_STATUS_OK);
--}
--
--
--
--/*
-- * After a receive has been processed,
-- * hang out the receive buffer again.
-- * This implicitly returns a receive token.
-- */
--int
--gmnal_rx_requeue_buffer(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
--{
-- CDEBUG(D_TRACE, "gmnal_rx_requeue_buffer\n");
--
-- CDEBUG(D_NET, "requeueing srxd[%p] nal_data[%p]\n", srxd, nal_data);
--
-- GMNAL_GM_LOCK(nal_data);
-- gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer,
-- srxd->gmsize, GM_LOW_PRIORITY, 0 );
-- GMNAL_GM_UNLOCK(nal_data);
--
-- return(GMNAL_STATUS_OK);
--}
--
--
--/*
-- * Handle a bad message
-- * A bad message is one we don't expect or can't interpret
-- */
--int
--gmnal_rx_bad(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, gmnal_srxd_t *srxd)
--{
-- CDEBUG(D_TRACE, "Can't handle message\n");
--
-- if (!srxd)
-- srxd = gmnal_rxbuffer_to_srxd(nal_data,
-- we->buffer);
-- if (srxd) {
-- gmnal_rx_requeue_buffer(nal_data, srxd);
-- } else {
-- CDEBUG(D_ERROR, "Can't find a descriptor for this buffer\n");
-- /*
-- * get rid of it ?
-- */
-- return(GMNAL_STATUS_FAIL);
-- }
--
-- return(GMNAL_STATUS_OK);
--}
--
--
--
--/*
-- * Process a small message receive.
-- * Get here from gmnal_receive_thread, gmnal_pre_receive
-- * lib_parse, cb_recv
-- * Put data from prewired receive buffer into users buffer(s)
-- * Hang out the receive buffer again for another receive
-- * Call lib_finalize
-- */
--int
- gmnal_small_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
- unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
-gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen)
--{
-- gmnal_srxd_t *srxd = NULL;
-- void *buffer = NULL;
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
- gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data;
--
--
-- CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen);
--
-- if (!private) {
-- CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
- lib_finalize(nal_cb, private, cookie, PTL_FAIL);
- lib_finalize(libnal, private, cookie, PTL_FAIL);
-- return(PTL_FAIL);
-- }
--
-- srxd = (gmnal_srxd_t*)private;
-- buffer = srxd->buffer;
-- buffer += sizeof(gmnal_msghdr_t);
-- buffer += sizeof(ptl_hdr_t);
--
-- while(niov--) {
- CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov,
- iov->iov_len);
- gm_bcopy(buffer, iov->iov_base, iov->iov_len);
- buffer += iov->iov_len;
- iov++;
- if (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- } else if (offset > 0) {
- CDEBUG(D_INFO, "processing [%p] base [%p] len %d, "
- "offset %d, len ["LPSZ"]\n", iov,
- iov->iov_base + offset, iov->iov_len, offset,
- iov->iov_len - offset);
- gm_bcopy(buffer, iov->iov_base + offset,
- iov->iov_len - offset);
- offset = 0;
- buffer += iov->iov_len - offset;
- } else {
- CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov,
- iov->iov_len);
- gm_bcopy(buffer, iov->iov_base, iov->iov_len);
- buffer += iov->iov_len;
- }
- iov++;
-- }
--
--
-- /*
-- * let portals library know receive is complete
-- */
-- CDEBUG(D_PORTALS, "calling lib_finalize\n");
- lib_finalize(nal_cb, private, cookie, PTL_OK);
- lib_finalize(libnal, private, cookie, PTL_OK);
-- /*
-- * return buffer so it can be used again
-- */
-- CDEBUG(D_NET, "calling gm_provide_receive_buffer\n");
-- GMNAL_GM_LOCK(nal_data);
-- gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer,
-- srxd->gmsize, GM_LOW_PRIORITY, 0);
-- GMNAL_GM_UNLOCK(nal_data);
--
-- return(PTL_OK);
--}
--
--
--/*
-- * Start a small transmit.
-- * Get a send token (and wired transmit buffer).
-- * Copy data from senders buffer to wired buffer and
-- * initiate gm_send from the wired buffer.
-- * The callback function informs when the send is complete.
-- */
--int
- gmnal_small_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
-gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
-- ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov, int size)
- unsigned int niov, struct iovec *iov, size_t offset, int size)
--{
- gmnal_data_t *nal_data = (gmnal_data_t*)nal_cb->nal_data;
- gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data;
-- gmnal_stxd_t *stxd = NULL;
-- void *buffer = NULL;
-- gmnal_msghdr_t *msghdr = NULL;
-- int tot_size = 0;
-- unsigned int local_nid;
-- gm_status_t gm_status = GM_SUCCESS;
--
- CDEBUG(D_TRACE, "gmnal_small_tx nal_cb [%p] private [%p] cookie [%p] "
- CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] "
-- "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
- "iov [%p] size [%d]\n", nal_cb, private, cookie, hdr, type,
- "iov [%p] size [%d]\n", libnal, private, cookie, hdr, type,
-- global_nid, pid, niov, iov, size);
--
-- CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
-- hdr->dest_nid, hdr->src_nid);
--
-- if (!nal_data) {
-- CDEBUG(D_ERROR, "no nal_data\n");
-- return(GMNAL_STATUS_FAIL);
-- } else {
-- CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
-- }
--
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid,
-- &local_nid);
-- GMNAL_GM_UNLOCK(nal_data);
-- if (gm_status != GM_SUCCESS) {
-- CDEBUG(D_ERROR, "Failed to obtain local id\n");
-- return(GMNAL_STATUS_FAIL);
-- }
-- CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid);
--
-- stxd = gmnal_get_stxd(nal_data, 1);
-- CDEBUG(D_INFO, "stxd [%p]\n", stxd);
--
-- stxd->type = GMNAL_SMALL_MESSAGE;
-- stxd->cookie = cookie;
--
-- /*
-- * Copy gmnal_msg_hdr and portals header to the transmit buffer
-- * Then copy the data in
-- */
-- buffer = stxd->buffer;
-- msghdr = (gmnal_msghdr_t*)buffer;
--
-- msghdr->magic = GMNAL_MAGIC;
-- msghdr->type = GMNAL_SMALL_MESSAGE;
-- msghdr->sender_node_id = nal_data->gm_global_nid;
-- CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer);
--
-- buffer += sizeof(gmnal_msghdr_t);
--
-- CDEBUG(D_INFO, "processing portals hdr at [%p]\n", buffer);
-- gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t));
--
-- buffer += sizeof(ptl_hdr_t);
--
-- while(niov--) {
- CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n",
- iov, iov->iov_len, buffer);
- gm_bcopy(iov->iov_base, buffer, iov->iov_len);
- buffer+= iov->iov_len;
- iov++;
- if (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- } else if (offset > 0) {
- CDEBUG(D_INFO, "processing iov [%p] base [%p] len ["LPSZ"] to [%p]\n",
- iov, iov->iov_base + offset, iov->iov_len - offset, buffer);
- gm_bcopy(iov->iov_base + offset, buffer, iov->iov_len - offset);
- buffer+= iov->iov_len - offset;
- offset = 0;
- } else {
- CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n",
- iov, iov->iov_len, buffer);
- gm_bcopy(iov->iov_base, buffer, iov->iov_len);
- buffer+= iov->iov_len;
- }
- iov++;
-- }
--
-- CDEBUG(D_INFO, "sending\n");
-- tot_size = size+sizeof(ptl_hdr_t)+sizeof(gmnal_msghdr_t);
-- stxd->msg_size = tot_size;
--
--
-- CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
-- "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
-- "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
-- stxd->msg_size, global_nid, local_nid, stxd);
--
-- GMNAL_GM_LOCK(nal_data);
-- stxd->gm_priority = GM_LOW_PRIORITY;
-- stxd->gm_target_node = local_nid;
-- gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer,
-- stxd->gm_size, stxd->msg_size,
-- GM_LOW_PRIORITY, local_nid,
-- gmnal_small_tx_callback, (void*)stxd);
-- GMNAL_GM_UNLOCK(nal_data);
-- CDEBUG(D_INFO, "done\n");
--
-- return(PTL_OK);
--}
--
--
--/*
-- * A callback to indicate the small transmit operation is compete
-- * Check for erros and try to deal with them.
-- * Call lib_finalise to inform the client application that the send
-- * is complete and the memory can be reused.
-- * Return the stxd when finished with it (returns a send token)
-- */
--void
--gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
--{
-- gmnal_stxd_t *stxd = (gmnal_stxd_t*)context;
-- lib_msg_t *cookie = stxd->cookie;
-- gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data;
- nal_cb_t *nal_cb = nal_data->nal_cb;
- lib_nal_t *libnal = nal_data->libnal;
--
-- if (!stxd) {
-- CDEBUG(D_TRACE, "send completion event for unknown stxd\n");
-- return;
-- }
-- if (status != GM_SUCCESS) {
-- CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s]\n",
-- stxd, gmnal_gm_error(status));
-- }
--
-- switch(status) {
-- case(GM_SUCCESS):
-- break;
--
--
--
-- case(GM_SEND_DROPPED):
-- /*
-- * do a resend on the dropped ones
-- */
-- CDEBUG(D_ERROR, "send stxd [%p] was dropped "
-- "resending\n", context);
-- GMNAL_GM_LOCK(nal_data);
-- gm_send_to_peer_with_callback(nal_data->gm_port,
-- stxd->buffer,
-- stxd->gm_size,
-- stxd->msg_size,
-- stxd->gm_priority,
-- stxd->gm_target_node,
-- gmnal_small_tx_callback,
-- context);
-- GMNAL_GM_UNLOCK(nal_data);
--
-- return;
-- case(GM_TIMED_OUT):
-- case(GM_SEND_TIMED_OUT):
-- /*
-- * drop these ones
-- */
-- CDEBUG(D_INFO, "calling gm_drop_sends\n");
-- GMNAL_GM_LOCK(nal_data);
-- gm_drop_sends(nal_data->gm_port, stxd->gm_priority,
-- stxd->gm_target_node, GMNAL_GM_PORT,
-- gmnal_drop_sends_callback, context);
-- GMNAL_GM_UNLOCK(nal_data);
--
-- return;
--
--
-- /*
-- * abort on these ?
-- */
-- case(GM_TRY_AGAIN):
-- case(GM_INTERRUPTED):
-- case(GM_FAILURE):
-- case(GM_INPUT_BUFFER_TOO_SMALL):
-- case(GM_OUTPUT_BUFFER_TOO_SMALL):
-- case(GM_BUSY):
-- case(GM_MEMORY_FAULT):
-- case(GM_INVALID_PARAMETER):
-- case(GM_OUT_OF_MEMORY):
-- case(GM_INVALID_COMMAND):
-- case(GM_PERMISSION_DENIED):
-- case(GM_INTERNAL_ERROR):
-- case(GM_UNATTACHED):
-- case(GM_UNSUPPORTED_DEVICE):
-- case(GM_SEND_REJECTED):
-- case(GM_SEND_TARGET_PORT_CLOSED):
-- case(GM_SEND_TARGET_NODE_UNREACHABLE):
-- case(GM_SEND_PORT_CLOSED):
-- case(GM_NODE_ID_NOT_YET_SET):
-- case(GM_STILL_SHUTTING_DOWN):
-- case(GM_CLONE_BUSY):
-- case(GM_NO_SUCH_DEVICE):
-- case(GM_ABORTED):
-- case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-- case(GM_UNTRANSLATED_SYSTEM_ERROR):
-- case(GM_ACCESS_DENIED):
-- case(GM_NO_DRIVER_SUPPORT):
-- case(GM_PTE_REF_CNT_OVERFLOW):
-- case(GM_NOT_SUPPORTED_IN_KERNEL):
-- case(GM_NOT_SUPPORTED_ON_ARCH):
-- case(GM_NO_MATCH):
-- case(GM_USER_ERROR):
-- case(GM_DATA_CORRUPTED):
-- case(GM_HARDWARE_FAULT):
-- case(GM_SEND_ORPHANED):
-- case(GM_MINOR_OVERFLOW):
-- case(GM_PAGE_TABLE_FULL):
-- case(GM_UC_ERROR):
-- case(GM_INVALID_PORT_NUMBER):
-- case(GM_DEV_NOT_FOUND):
-- case(GM_FIRMWARE_NOT_RUNNING):
-- case(GM_YP_NO_MATCH):
-- default:
-- CDEBUG(D_ERROR, "Unknown send error\n");
-- gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
-- stxd->gm_target_node, GMNAL_GM_PORT,
-- gmnal_resume_sending_callback, context);
-- return;
--
-- }
--
-- /*
-- * TO DO
-- * If this is a large message init,
-- * we're not finished with the data yet,
-- * so can't call lib_finalise.
-- * However, we're also holding on to a
-- * stxd here (to keep track of the source
-- * iovec only). Should use another structure
-- * to keep track of iovec and return stxd to
-- * free list earlier.
-- */
-- if (stxd->type == GMNAL_LARGE_MESSAGE_INIT) {
-- CDEBUG(D_INFO, "large transmit done\n");
-- return;
-- }
-- gmnal_return_stxd(nal_data, stxd);
- lib_finalize(nal_cb, stxd, cookie, PTL_OK);
- lib_finalize(libnal, stxd, cookie, PTL_OK);
-- return;
--}
--
--/*
-- * After an error on the port
-- * call this to allow future sends to complete
-- */
--void gmnal_resume_sending_callback(struct gm_port *gm_port, void *context,
-- gm_status_t status)
--{
-- gmnal_data_t *nal_data;
-- gmnal_stxd_t *stxd = (gmnal_stxd_t*)context;
-- CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
-- gmnal_return_stxd(stxd->nal_data, stxd);
-- return;
--}
--
--
--void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context,
-- gm_status_t status)
--{
-- gmnal_stxd_t *stxd = (gmnal_stxd_t*)context;
-- gmnal_data_t *nal_data = stxd->nal_data;
--
-- CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
-- if (status == GM_SUCCESS) {
-- GMNAL_GM_LOCK(nal_data);
-- gm_send_to_peer_with_callback(gm_port, stxd->buffer,
-- stxd->gm_size, stxd->msg_size,
-- stxd->gm_priority,
-- stxd->gm_target_node,
-- gmnal_small_tx_callback,
-- context);
-- GMNAL_GM_LOCK(nal_data);
-- } else {
-- CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
-- "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
-- }
--
--
-- return;
--}
--
--
--/*
-- * Begine a large transmit.
-- * Do a gm_register of the memory pointed to by the iovec
-- * and send details to the receiver. The receiver does a gm_get
-- * to pull the data and sends and ack when finished. Upon receipt of
-- * this ack, deregister the memory. Only 1 send token is required here.
-- */
--int
- gmnal_large_tx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
-gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
-- ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov, int size)
- unsigned int niov, struct iovec *iov, size_t offset, int size)
--{
--
-- gmnal_data_t *nal_data;
-- gmnal_stxd_t *stxd = NULL;
-- void *buffer = NULL;
-- gmnal_msghdr_t *msghdr = NULL;
-- unsigned int local_nid;
-- int mlen = 0; /* the size of the init message data */
-- struct iovec *iov_dup = NULL;
-- gm_status_t gm_status;
-- int niov_dup;
--
--
- CDEBUG(D_TRACE, "gmnal_large_tx nal_cb [%p] private [%p], cookie [%p] "
- CDEBUG(D_TRACE, "gmnal_large_tx libnal [%p] private [%p], cookie [%p] "
-- "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
- "iov [%p], size [%d]\n", nal_cb, private, cookie, hdr, type,
- "iov [%p], size [%d]\n", libnal, private, cookie, hdr, type,
-- global_nid, pid, niov, iov, size);
--
- if (nal_cb)
- nal_data = (gmnal_data_t*)nal_cb->nal_data;
- if (libnal)
- nal_data = (gmnal_data_t*)libnal->libnal_data;
-- else {
- CDEBUG(D_ERROR, "no nal_cb.\n");
- CDEBUG(D_ERROR, "no libnal.\n");
-- return(GMNAL_STATUS_FAIL);
-- }
--
--
-- /*
-- * Get stxd and buffer. Put local address of data in buffer,
-- * send local addresses to target,
-- * wait for the target node to suck the data over.
-- * The stxd is used to ren
-- */
-- stxd = gmnal_get_stxd(nal_data, 1);
-- CDEBUG(D_INFO, "stxd [%p]\n", stxd);
--
-- stxd->type = GMNAL_LARGE_MESSAGE_INIT;
-- stxd->cookie = cookie;
--
-- /*
-- * Copy gmnal_msg_hdr and portals header to the transmit buffer
-- * Then copy the iov in
-- */
-- buffer = stxd->buffer;
-- msghdr = (gmnal_msghdr_t*)buffer;
--
-- CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer);
--
-- msghdr->magic = GMNAL_MAGIC;
-- msghdr->type = GMNAL_LARGE_MESSAGE_INIT;
-- msghdr->sender_node_id = nal_data->gm_global_nid;
-- msghdr->stxd = stxd;
-- msghdr->niov = niov ;
-- buffer += sizeof(gmnal_msghdr_t);
-- mlen = sizeof(gmnal_msghdr_t);
-- CDEBUG(D_INFO, "mlen is [%d]\n", mlen);
--
--
-- CDEBUG(D_INFO, "processing portals hdr at [%p]\n", buffer);
--
-- gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t));
-- buffer += sizeof(ptl_hdr_t);
-- mlen += sizeof(ptl_hdr_t);
-- CDEBUG(D_INFO, "mlen is [%d]\n", mlen);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- }
-
- LASSERT(offset >= 0);
- /*
- * Store the iovs in the stxd for we can get
- * them later if we need them
- */
- stxd->iov[0].iov_base = iov->iov_base + offset;
- stxd->iov[0].iov_len = iov->iov_len - offset;
- CDEBUG(D_NET, "Copying iov [%p] to [%p], niov=%d\n", iov, stxd->iov, niov);
- if (niov > 1)
- gm_bcopy(&iov[1], &stxd->iov[1], (niov-1)*sizeof(struct iovec));
- stxd->niov = niov;
--
-- /*
-- * copy the iov to the buffer so target knows
-- * where to get the data from
-- */
-- CDEBUG(D_INFO, "processing iov to [%p]\n", buffer);
- gm_bcopy(iov, buffer, niov*sizeof(struct iovec));
- mlen += niov*(sizeof(struct iovec));
- gm_bcopy(stxd->iov, buffer, stxd->niov*sizeof(struct iovec));
- mlen += stxd->niov*(sizeof(struct iovec));
-- CDEBUG(D_INFO, "mlen is [%d]\n", mlen);
-
-
- /*
- * Store the iovs in the stxd for we can get
- * them later if we need them
- */
- CDEBUG(D_NET, "Copying iov [%p] to [%p]\n", iov, stxd->iov);
- gm_bcopy(iov, stxd->iov, niov*sizeof(struct iovec));
- stxd->niov = niov;
--
-
-- /*
-- * register the memory so the NIC can get hold of the data
-- * This is a slow process. it'd be good to overlap it
-- * with something else.
-- */
- iov = stxd->iov;
-- iov_dup = iov;
-- niov_dup = niov;
-- while(niov--) {
-- CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n",
-- iov->iov_base, iov->iov_len);
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_register_memory(nal_data->gm_port,
-- iov->iov_base, iov->iov_len);
-- if (gm_status != GM_SUCCESS) {
-- GMNAL_GM_UNLOCK(nal_data);
-- CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
-- "for memory [%p] len ["LPSZ"]\n",
-- gm_status, gmnal_gm_error(gm_status),
-- iov->iov_base, iov->iov_len);
-- GMNAL_GM_LOCK(nal_data);
-- while (iov_dup != iov) {
-- gm_deregister_memory(nal_data->gm_port,
-- iov_dup->iov_base,
-- iov_dup->iov_len);
-- iov_dup++;
-- }
-- GMNAL_GM_UNLOCK(nal_data);
-- gmnal_return_stxd(nal_data, stxd);
-- return(PTL_FAIL);
-- }
--
-- GMNAL_GM_UNLOCK(nal_data);
-- iov++;
-- }
--
-- /*
-- * Send the init message to the target
-- */
-- CDEBUG(D_INFO, "sending mlen [%d]\n", mlen);
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid,
-- &local_nid);
-- if (gm_status != GM_SUCCESS) {
-- GMNAL_GM_UNLOCK(nal_data);
-- CDEBUG(D_ERROR, "Failed to obtain local id\n");
-- gmnal_return_stxd(nal_data, stxd);
-- /* TO DO deregister memory on failure */
-- return(GMNAL_STATUS_FAIL);
-- }
-- CDEBUG(D_INFO, "Local Node_id is [%d]\n", local_nid);
-- gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer,
-- stxd->gm_size, mlen, GM_LOW_PRIORITY,
-- local_nid, gmnal_large_tx_callback,
-- (void*)stxd);
-- GMNAL_GM_UNLOCK(nal_data);
--
-- CDEBUG(D_INFO, "done\n");
--
-- return(PTL_OK);
--}
--
--/*
-- * Callback function indicates that send of buffer with
-- * large message iovec has completed (or failed).
-- */
--void
--gmnal_large_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
--{
-- gmnal_small_tx_callback(gm_port, context, status);
--
--}
--
--
--
--/*
-- * Have received a buffer that contains an iovec of the sender.
-- * Do a gm_register_memory of the receivers buffer and then do a get
-- * data from the sender.
-- */
--int
- gmnal_large_rx(nal_cb_t *nal_cb, void *private, lib_msg_t *cookie,
- unsigned int nriov, struct iovec *riov, size_t mlen,
- size_t rlen)
-gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- unsigned int nriov, struct iovec *riov, size_t offset,
- size_t mlen, size_t rlen)
--{
- gmnal_data_t *nal_data = nal_cb->nal_data;
- gmnal_data_t *nal_data = libnal->libnal_data;
-- gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
-- void *buffer = NULL;
-- struct iovec *riov_dup;
-- int nriov_dup;
-- gmnal_msghdr_t *msghdr = NULL;
-- gm_status_t gm_status;
--
- CDEBUG(D_TRACE, "gmnal_large_rx :: nal_cb[%p], private[%p], "
- CDEBUG(D_TRACE, "gmnal_large_rx :: libnal[%p], private[%p], "
-- "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- nal_cb, private, cookie, nriov, riov, mlen, rlen);
- libnal, private, cookie, nriov, riov, mlen, rlen);
--
-- if (!srxd) {
-- CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
- lib_finalize(nal_cb, private, cookie, PTL_FAIL);
- lib_finalize(libnal, private, cookie, PTL_FAIL);
-- return(PTL_FAIL);
-- }
--
-- buffer = srxd->buffer;
-- msghdr = (gmnal_msghdr_t*)buffer;
-- buffer += sizeof(gmnal_msghdr_t);
-- buffer += sizeof(ptl_hdr_t);
--
-- /*
-- * Store the senders stxd address in the srxd for this message
-- * The gmnal_large_message_ack needs it to notify the sender
-- * the pull of data is complete
-- */
-- srxd->source_stxd = msghdr->stxd;
--
-- /*
-- * Register the receivers memory
-- * get the data,
-- * tell the sender that we got the data
-- * then tell the receiver we got the data
-- * TO DO
-- * If the iovecs match, could interleave
-- * gm_registers and gm_gets for each element
- */
- while (offset >= riov->iov_len) {
- offset -= riov->iov_len;
- riov++;
- nriov--;
- }
- LASSERT (nriov >= 0);
- LASSERT (offset >= 0);
- /*
- * do this so the final gm_get callback can deregister the memory
-- */
- PORTAL_ALLOC(srxd->riov, nriov*(sizeof(struct iovec)));
-
- srxd->riov[0].iov_base = riov->iov_base + offset;
- srxd->riov[0].iov_len = riov->iov_len - offset;
- if (nriov > 1)
- gm_bcopy(&riov[1], &srxd->riov[1], (nriov-1)*(sizeof(struct iovec)));
- srxd->nriov = nriov;
-
- riov = srxd->riov;
-- nriov_dup = nriov;
-- riov_dup = riov;
-- while(nriov--) {
-- CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n",
-- riov->iov_base, riov->iov_len);
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_register_memory(nal_data->gm_port,
-- riov->iov_base, riov->iov_len);
-- if (gm_status != GM_SUCCESS) {
-- GMNAL_GM_UNLOCK(nal_data);
-- CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
-- "for memory [%p] len ["LPSZ"]\n",
-- gm_status, gmnal_gm_error(gm_status),
-- riov->iov_base, riov->iov_len);
-- GMNAL_GM_LOCK(nal_data);
-- while (riov_dup != riov) {
-- gm_deregister_memory(nal_data->gm_port,
-- riov_dup->iov_base,
-- riov_dup->iov_len);
-- riov_dup++;
-- }
-- GMNAL_GM_LOCK(nal_data);
-- /*
-- * give back srxd and buffer. Send NACK to sender
-- */
- PORTAL_FREE(srxd->riov, nriov_dup*(sizeof(struct iovec)));
-- return(PTL_FAIL);
-- }
-- GMNAL_GM_UNLOCK(nal_data);
-- riov++;
-- }
- /*
- * do this so the final gm_get callback can deregister the memory
- */
- PORTAL_ALLOC(srxd->riov, nriov_dup*(sizeof(struct iovec)));
- gm_bcopy(riov_dup, srxd->riov, nriov_dup*(sizeof(struct iovec)));
- srxd->nriov = nriov_dup;
--
-- /*
-- * now do gm_get to get the data
-- */
-- srxd->cookie = cookie;
-- if (gmnal_remote_get(srxd, srxd->nsiov, (struct iovec*)buffer,
-- nriov_dup, riov_dup) != GMNAL_STATUS_OK) {
-- CDEBUG(D_ERROR, "can't get the data");
-- }
--
-- CDEBUG(D_INFO, "lgmanl_large_rx done\n");
--
-- return(PTL_OK);
--}
--
--
--/*
-- * Perform a number of remote gets as part of receiving
-- * a large message.
-- * The final one to complete (i.e. the last callback to get called)
-- * tidies up.
-- * gm_get requires a send token.
-- */
--int
--gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov,
-- int nriov, struct iovec *riov)
--{
--
-- int ncalls = 0;
--
-- CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], "
-- "nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
--
--
-- ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov);
-- if (ncalls < 0) {
-- CDEBUG(D_ERROR, "there's something wrong with the iovecs\n");
-- return(GMNAL_STATUS_FAIL);
-- }
-- CDEBUG(D_INFO, "gmnal_remote_get ncalls [%d]\n", ncalls);
-- spin_lock_init(&srxd->callback_lock);
-- srxd->ncallbacks = ncalls;
-- srxd->callback_status = 0;
--
-- ncalls = gmnal_copyiov(1, srxd, nsiov, siov, nriov, riov);
-- if (ncalls < 0) {
-- CDEBUG(D_ERROR, "there's something wrong with the iovecs\n");
-- return(GMNAL_STATUS_FAIL);
-- }
--
-- return(GMNAL_STATUS_OK);
--
--}
--
--
--/*
-- * pull data from source node (source iovec) to a local iovec.
-- * The iovecs may not match which adds the complications below.
-- * Count the number of gm_gets that will be required to the callbacks
-- * can determine who is the last one.
-- */
--int
--gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov,
-- struct iovec *siov, int nriov, struct iovec *riov)
--{
--
-- int ncalls = 0;
-- int slen = siov->iov_len, rlen = riov->iov_len;
-- char *sbuf = siov->iov_base, *rbuf = riov->iov_base;
-- unsigned long sbuf_long;
-- gm_remote_ptr_t remote_ptr = 0;
-- unsigned int source_node;
-- gmnal_ltxd_t *ltxd = NULL;
-- gmnal_data_t *nal_data = srxd->nal_data;
--
-- CDEBUG(D_TRACE, "copy[%d] nal_data[%p]\n", do_copy, nal_data);
-- if (do_copy) {
-- if (!nal_data) {
-- CDEBUG(D_ERROR, "Bad args No nal_data\n");
-- return(GMNAL_STATUS_FAIL);
-- }
-- GMNAL_GM_LOCK(nal_data);
-- if (gm_global_id_to_node_id(nal_data->gm_port,
-- srxd->gm_source_node,
-- &source_node) != GM_SUCCESS) {
--
-- CDEBUG(D_ERROR, "cannot resolve global_id [%u] "
-- "to local node_id\n", srxd->gm_source_node);
-- GMNAL_GM_UNLOCK(nal_data);
-- return(GMNAL_STATUS_FAIL);
-- }
-- GMNAL_GM_UNLOCK(nal_data);
-- /*
-- * We need a send token to use gm_get
-- * getting an stxd gets us a send token.
-- * the stxd is used as the context to the
-- * callback function (so stxd can be returned).
-- * Set pointer in stxd to srxd so callback count in srxd
-- * can be decremented to find last callback to complete
-- */
-- CDEBUG(D_INFO, "gmnal_copyiov source node is G[%u]L[%d]\n",
-- srxd->gm_source_node, source_node);
-- }
--
-- do {
-- CDEBUG(D_INFO, "sbuf[%p] slen[%d] rbuf[%p], rlen[%d]\n",
-- sbuf, slen, rbuf, rlen);
-- if (slen > rlen) {
-- ncalls++;
-- if (do_copy) {
-- CDEBUG(D_INFO, "slen>rlen\n");
-- ltxd = gmnal_get_ltxd(nal_data);
-- ltxd->srxd = srxd;
-- GMNAL_GM_LOCK(nal_data);
-- /*
-- * funny business to get rid
-- * of compiler warning
-- */
-- sbuf_long = (unsigned long) sbuf;
-- remote_ptr = (gm_remote_ptr_t)sbuf_long;
-- gm_get(nal_data->gm_port, remote_ptr, rbuf,
-- rlen, GM_LOW_PRIORITY, source_node,
-- GMNAL_GM_PORT,
-- gmnal_remote_get_callback, ltxd);
-- GMNAL_GM_UNLOCK(nal_data);
-- }
-- /*
-- * at the end of 1 iov element
-- */
-- sbuf+=rlen;
-- slen-=rlen;
-- riov++;
-- nriov--;
-- rbuf = riov->iov_base;
-- rlen = riov->iov_len;
-- } else if (rlen > slen) {
-- ncalls++;
-- if (do_copy) {
-- CDEBUG(D_INFO, "slen<rlen\n");
-- ltxd = gmnal_get_ltxd(nal_data);
-- ltxd->srxd = srxd;
-- GMNAL_GM_LOCK(nal_data);
-- sbuf_long = (unsigned long) sbuf;
-- remote_ptr = (gm_remote_ptr_t)sbuf_long;
-- gm_get(nal_data->gm_port, remote_ptr, rbuf,
-- slen, GM_LOW_PRIORITY, source_node,
-- GMNAL_GM_PORT,
-- gmnal_remote_get_callback, ltxd);
-- GMNAL_GM_UNLOCK(nal_data);
-- }
-- /*
-- * at end of siov element
-- */
-- rbuf+=slen;
-- rlen-=slen;
-- siov++;
-- sbuf = siov->iov_base;
-- slen = siov->iov_len;
-- } else {
-- ncalls++;
-- if (do_copy) {
-- CDEBUG(D_INFO, "rlen=slen\n");
-- ltxd = gmnal_get_ltxd(nal_data);
-- ltxd->srxd = srxd;
-- GMNAL_GM_LOCK(nal_data);
-- sbuf_long = (unsigned long) sbuf;
-- remote_ptr = (gm_remote_ptr_t)sbuf_long;
-- gm_get(nal_data->gm_port, remote_ptr, rbuf,
-- rlen, GM_LOW_PRIORITY, source_node,
-- GMNAL_GM_PORT,
-- gmnal_remote_get_callback, ltxd);
-- GMNAL_GM_UNLOCK(nal_data);
-- }
-- /*
-- * at end of siov and riov element
-- */
-- siov++;
-- sbuf = siov->iov_base;
-- slen = siov->iov_len;
-- riov++;
-- nriov--;
-- rbuf = riov->iov_base;
-- rlen = riov->iov_len;
-- }
--
-- } while (nriov);
-- return(ncalls);
--}
--
--
--/*
-- * The callback function that is invoked after each gm_get call completes.
-- * Multiple callbacks may be invoked for 1 transaction, only the final
-- * callback has work to do.
-- */
--void
--gmnal_remote_get_callback(gm_port_t *gm_port, void *context,
-- gm_status_t status)
--{
--
-- gmnal_ltxd_t *ltxd = (gmnal_ltxd_t*)context;
-- gmnal_srxd_t *srxd = ltxd->srxd;
- nal_cb_t *nal_cb = srxd->nal_data->nal_cb;
- lib_nal_t *libnal = srxd->nal_data->libnal;
-- int lastone;
-- struct iovec *riov;
-- int nriov;
-- gmnal_data_t *nal_data;
--
-- CDEBUG(D_TRACE, "called for context [%p]\n", context);
--
-- if (status != GM_SUCCESS) {
-- CDEBUG(D_ERROR, "reports error [%d][%s]\n", status,
-- gmnal_gm_error(status));
-- }
--
-- spin_lock(&srxd->callback_lock);
-- srxd->ncallbacks--;
-- srxd->callback_status |= status;
-- lastone = srxd->ncallbacks?0:1;
-- spin_unlock(&srxd->callback_lock);
-- nal_data = srxd->nal_data;
--
-- /*
-- * everyone returns a send token
-- */
-- gmnal_return_ltxd(nal_data, ltxd);
--
-- if (!lastone) {
-- CDEBUG(D_ERROR, "NOT final callback context[%p]\n", srxd);
-- return;
-- }
--
-- /*
-- * Let our client application proceed
-- */
-- CDEBUG(D_ERROR, "final callback context[%p]\n", srxd);
- lib_finalize(nal_cb, srxd, srxd->cookie, PTL_OK);
- lib_finalize(libnal, srxd, srxd->cookie, PTL_OK);
--
-- /*
-- * send an ack to the sender to let him know we got the data
-- */
-- gmnal_large_tx_ack(nal_data, srxd);
--
-- /*
-- * Unregister the memory that was used
-- * This is a very slow business (slower then register)
-- */
-- nriov = srxd->nriov;
-- riov = srxd->riov;
-- GMNAL_GM_LOCK(nal_data);
-- while (nriov--) {
-- CDEBUG(D_ERROR, "deregister memory [%p]\n", riov->iov_base);
-- if (gm_deregister_memory(srxd->nal_data->gm_port,
-- riov->iov_base, riov->iov_len)) {
-- CDEBUG(D_ERROR, "failed to deregister memory [%p]\n",
-- riov->iov_base);
-- }
-- riov++;
-- }
-- GMNAL_GM_UNLOCK(nal_data);
-- PORTAL_FREE(srxd->riov, sizeof(struct iovec)*nriov);
--
-- /*
-- * repost the receive buffer (return receive token)
-- */
-- GMNAL_GM_LOCK(nal_data);
-- gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer,
-- srxd->gmsize, GM_LOW_PRIORITY, 0);
-- GMNAL_GM_UNLOCK(nal_data);
--
-- return;
--}
--
--
--/*
-- * Called on target node.
-- * After pulling data from a source node
-- * send an ack message to indicate the large transmit is complete.
-- */
--void
--gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
--{
--
-- gmnal_stxd_t *stxd;
-- gmnal_msghdr_t *msghdr;
-- void *buffer = NULL;
-- unsigned int local_nid;
-- gm_status_t gm_status = GM_SUCCESS;
--
-- CDEBUG(D_TRACE, "srxd[%p] target_node [%u]\n", srxd,
-- srxd->gm_source_node);
--
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_global_id_to_node_id(nal_data->gm_port,
-- srxd->gm_source_node, &local_nid);
-- GMNAL_GM_UNLOCK(nal_data);
-- if (gm_status != GM_SUCCESS) {
-- CDEBUG(D_ERROR, "Failed to obtain local id\n");
-- return;
-- }
-- CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid);
--
-- stxd = gmnal_get_stxd(nal_data, 1);
-- CDEBUG(D_TRACE, "gmnal_large_tx_ack got stxd[%p]\n", stxd);
--
-- stxd->nal_data = nal_data;
-- stxd->type = GMNAL_LARGE_MESSAGE_ACK;
--
-- /*
-- * Copy gmnal_msg_hdr and portals header to the transmit buffer
-- * Then copy the data in
-- */
-- buffer = stxd->buffer;
-- msghdr = (gmnal_msghdr_t*)buffer;
--
-- /*
-- * Add in the address of the original stxd from the sender node
-- * so it knows which thread to notify.
-- */
-- msghdr->magic = GMNAL_MAGIC;
-- msghdr->type = GMNAL_LARGE_MESSAGE_ACK;
-- msghdr->sender_node_id = nal_data->gm_global_nid;
-- msghdr->stxd = srxd->source_stxd;
-- CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer);
--
-- CDEBUG(D_INFO, "sending\n");
-- stxd->msg_size= sizeof(gmnal_msghdr_t);
--
--
-- CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
-- "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] "
-- "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size,
-- stxd->msg_size, srxd->gm_source_node, local_nid, stxd);
-- GMNAL_GM_LOCK(nal_data);
-- stxd->gm_priority = GM_LOW_PRIORITY;
-- stxd->gm_target_node = local_nid;
-- gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer,
-- stxd->gm_size, stxd->msg_size,
-- GM_LOW_PRIORITY, local_nid,
-- gmnal_large_tx_ack_callback,
-- (void*)stxd);
--
-- GMNAL_GM_UNLOCK(nal_data);
-- CDEBUG(D_INFO, "gmnal_large_tx_ack :: done\n");
--
-- return;
--}
--
--
--/*
-- * A callback to indicate the small transmit operation is compete
-- * Check for errors and try to deal with them.
-- * Call lib_finalise to inform the client application that the
-- * send is complete and the memory can be reused.
-- * Return the stxd when finished with it (returns a send token)
-- */
--void
--gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context,
-- gm_status_t status)
--{
-- gmnal_stxd_t *stxd = (gmnal_stxd_t*)context;
-- gmnal_data_t *nal_data = (gmnal_data_t*)stxd->nal_data;
--
-- if (!stxd) {
-- CDEBUG(D_ERROR, "send completion event for unknown stxd\n");
-- return;
-- }
-- CDEBUG(D_TRACE, "send completion event for stxd [%p] status is [%d]\n",
-- stxd, status);
-- gmnal_return_stxd(stxd->nal_data, stxd);
--
-- GMNAL_GM_UNLOCK(nal_data);
-- return;
--}
--
--/*
-- * Indicates the large transmit operation is compete.
-- * Called on transmit side (means data has been pulled by receiver
-- * or failed).
-- * Call lib_finalise to inform the client application that the send
-- * is complete, deregister the memory and return the stxd.
-- * Finally, report the rx buffer that the ack message was delivered in.
-- */
--void
--gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
--{
- nal_cb_t *nal_cb = nal_data->nal_cb;
- lib_nal_t *libnal = nal_data->libnal;
-- gmnal_stxd_t *stxd = NULL;
-- gmnal_msghdr_t *msghdr = NULL;
-- void *buffer = NULL;
-- struct iovec *iov;
--
--
-- CDEBUG(D_TRACE, "gmnal_large_tx_ack_received buffer [%p]\n", buffer);
--
-- buffer = srxd->buffer;
-- msghdr = (gmnal_msghdr_t*)buffer;
-- stxd = msghdr->stxd;
--
-- CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
--
- lib_finalize(nal_cb, stxd, stxd->cookie, PTL_OK);
- lib_finalize(libnal, stxd, stxd->cookie, PTL_OK);
--
-- /*
-- * extract the iovec from the stxd, deregister the memory.
-- * free the space used to store the iovec
-- */
-- iov = stxd->iov;
-- while(stxd->niov--) {
-- CDEBUG(D_INFO, "deregister memory [%p] size ["LPSZ"]\n",
-- iov->iov_base, iov->iov_len);
-- GMNAL_GM_LOCK(nal_data);
-- gm_deregister_memory(nal_data->gm_port, iov->iov_base,
-- iov->iov_len);
-- GMNAL_GM_UNLOCK(nal_data);
-- iov++;
-- }
--
-- /*
-- * return the send token
-- * TO DO It is bad to hold onto the send token so long?
-- */
-- gmnal_return_stxd(nal_data, stxd);
--
--
-- /*
-- * requeue the receive buffer
-- */
-- gmnal_rx_requeue_buffer(nal_data, srxd);
--
--
-- return;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
-- *
-- * This file is part of Lustre, http://www.lustre.org/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#include "gmnal.h"
--
--
--int gmnal_small_msg_size = 525312;
--/*
-- * -1 indicates default value.
-- * This is 1 thread per cpu
-- * See start_kernel_threads
-- */
--int num_rx_threads = -1;
--int num_stxds = 5;
--int gm_port = 4;
-
- ptl_handle_ni_t kgmnal_ni;
-
--
--int
--gmnal_cmd(struct portals_cfg *pcfg, void *private)
--{
-- gmnal_data_t *nal_data = NULL;
-- char *name = NULL;
-- int nid = -2;
-- int gnid;
-- gm_status_t gm_status;
--
--
-- CDEBUG(D_TRACE, "gmnal_cmd [%d] private [%p]\n",
-- pcfg->pcfg_command, private);
-- nal_data = (gmnal_data_t*)private;
-- switch(pcfg->pcfg_command) {
-- /*
-- * just reuse already defined GET_NID. Should define GMNAL version
-- */
-- case(GMNAL_IOC_GET_GNID):
--
-- PORTAL_ALLOC(name, pcfg->pcfg_plen1);
-- copy_from_user(name, pcfg->pcfg_pbuf1, pcfg->pcfg_plen1);
--
-- GMNAL_GM_LOCK(nal_data);
- nid = gm_host_name_to_node_id(nal_data->gm_port, name);
- //nid = gm_host_name_to_node_id(nal_data->gm_port, name);
- gm_status = gm_host_name_to_node_id_ex (nal_data->gm_port, 0, name, &nid);
-- GMNAL_GM_UNLOCK(nal_data);
- CDEBUG(D_INFO, "Local node id is [%d]\n", nid);
- if (gm_status != GM_SUCCESS) {
- CDEBUG(D_INFO, "gm_host_name_to_node_id_ex(...host %s) failed[%d]\n",
- name, gm_status);
- return (-1);
- } else
- CDEBUG(D_INFO, "Local node %s id is [%d]\n", name, nid);
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_node_id_to_global_id(nal_data->gm_port,
-- nid, &gnid);
-- GMNAL_GM_UNLOCK(nal_data);
-- if (gm_status != GM_SUCCESS) {
-- CDEBUG(D_INFO, "gm_node_id_to_global_id failed[%d]\n",
-- gm_status);
-- return(-1);
-- }
-- CDEBUG(D_INFO, "Global node is is [%u][%x]\n", gnid, gnid);
-- copy_to_user(pcfg->pcfg_pbuf2, &gnid, pcfg->pcfg_plen2);
-- break;
-- default:
-- CDEBUG(D_INFO, "gmnal_cmd UNKNOWN[%d]\n", pcfg->pcfg_command);
-- pcfg->pcfg_nid2 = -1;
-- }
--
--
-- return(0);
--}
--
--
--static int __init
--gmnal_load(void)
--{
-- int status;
-- CDEBUG(D_TRACE, "This is the gmnal module initialisation routine\n");
-
--
--
-- CDEBUG(D_INFO, "Calling gmnal_init\n");
- status = PtlNIInit(gmnal_init, 32, 4, 0, &kgmnal_ni);
- status = gmnal_init();
-- if (status == PTL_OK) {
- CDEBUG(D_INFO, "Portals GMNAL initialised ok kgmnal_ni\n");
- CDEBUG(D_INFO, "Portals GMNAL initialised ok\n");
-- } else {
-- CDEBUG(D_INFO, "Portals GMNAL Failed to initialise\n");
- return(1);
- return(-ENODEV);
--
- }
-
- CDEBUG(D_INFO, "Calling kportal_nal_register\n");
- /*
- * global_nal_data is set by gmnal_init
- */
- if (kportal_nal_register(GMNAL, &gmnal_cmd, global_nal_data) != 0) {
- CDEBUG(D_INFO, "kportal_nal_register failed\n");
- return(1);
-- }
--
- CDEBUG(D_INFO, "Calling PORTAL_SYMBOL_REGISTER\n");
- PORTAL_SYMBOL_REGISTER(kgmnal_ni);
-- CDEBUG(D_INFO, "This is the end of the gmnal init routine");
--
--
-- return(0);
--}
--
--
--static void __exit
--gmnal_unload(void)
--{
-
- kportal_nal_unregister(GMNAL);
- PORTAL_SYMBOL_UNREGISTER(kgmnal_ni);
-- gmnal_fini();
- global_nal_data = NULL;
-- return;
--}
--
--
--module_init(gmnal_load);
--
--module_exit(gmnal_unload);
-
- EXPORT_SYMBOL(kgmnal_ni);
--
--MODULE_PARM(gmnal_small_msg_size, "i");
--MODULE_PARM(num_rx_threads, "i");
--MODULE_PARM(num_stxds, "i");
--MODULE_PARM(gm_port, "i");
--
--MODULE_AUTHOR("Morgan Doyle");
--
--MODULE_DESCRIPTION("A Portals kernel NAL for Myrinet GM.");
--
--MODULE_LICENSE("GPL");
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
-- *
-- * This file is part of Lustre, http://www.lustre.org/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--/*
-- * All utilities required by lgmanl
-- */
--
--#include "gmnal.h"
--
--/*
-- * Am I one of the gmnal rxthreads ?
-- */
--int
--gmnal_is_rxthread(gmnal_data_t *nal_data)
--{
-- int i;
-- for (i=0; i<num_rx_threads; i++) {
-- if (nal_data->rxthread_pid[i] == current->pid)
-- return(1);
-- }
-- return(0);
--}
--
--
--/*
-- * Allocate tx descriptors/tokens (large and small)
-- * allocate a number of small tx buffers and register with GM
-- * so they are wired and set up for DMA. This is a costly operation.
-- * Also allocate a corrosponding descriptor to keep track of
-- * the buffer.
-- * Put all small descriptors on singly linked list to be available to send
-- * function.
-- * Allocate the rest of the available tx tokens for large messages. These will be
-- * used to do gm_gets in gmnal_copyiov
-- */
--int
--gmnal_alloc_txd(gmnal_data_t *nal_data)
--{
-- int ntx= 0, nstx= 0, nrxt_stx= 0,
-- nltx= 0, i = 0;
-- gmnal_stxd_t *txd = NULL;
-- gmnal_ltxd_t *ltxd = NULL;
-- void *txbuffer = NULL;
--
-- CDEBUG(D_TRACE, "gmnal_alloc_small tx\n");
--
-- GMNAL_GM_LOCK(nal_data);
-- /*
-- * total number of transmit tokens
-- */
-- ntx = gm_num_send_tokens(nal_data->gm_port);
-- GMNAL_GM_UNLOCK(nal_data);
-- CDEBUG(D_INFO, "total number of send tokens available is [%d]\n", ntx);
--
-- /*
-- * allocate a number for small sends
-- * num_stxds from gmnal_module.c
-- */
-- nstx = num_stxds;
-- /*
-- * give that number plus 1 to the receive threads
-- */
-- nrxt_stx = nstx + 1;
--
-- /*
-- * give the rest for gm_gets
-- */
-- nltx = ntx - (nrxt_stx + nstx);
-- if (nltx < 1) {
-- CDEBUG(D_ERROR, "No tokens available for large messages\n");
-- return(GMNAL_STATUS_FAIL);
-- }
--
--
-- /*
-- * A semaphore is initialised with the
-- * number of transmit tokens available.
-- * To get a stxd, acquire the token semaphore.
-- * this decrements the available token count
-- * (if no tokens you block here, someone returning a
-- * stxd will release the semaphore and wake you)
-- * When token is obtained acquire the spinlock
-- * to manipulate the list
-- */
-- GMNAL_TXD_TOKEN_INIT(nal_data, nstx);
-- GMNAL_TXD_LOCK_INIT(nal_data);
-- GMNAL_RXT_TXD_TOKEN_INIT(nal_data, nrxt_stx);
-- GMNAL_RXT_TXD_LOCK_INIT(nal_data);
-- GMNAL_LTXD_TOKEN_INIT(nal_data, nltx);
-- GMNAL_LTXD_LOCK_INIT(nal_data);
--
-- for (i=0; i<=nstx; i++) {
-- PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t));
-- if (!txd) {
-- CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i);
-- return(GMNAL_STATUS_NOMEM);
-- }
-- GMNAL_GM_LOCK(nal_data);
-- txbuffer = gm_dma_malloc(nal_data->gm_port,
-- GMNAL_SMALL_MSG_SIZE(nal_data));
-- GMNAL_GM_UNLOCK(nal_data);
-- if (!txbuffer) {
-- CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
-- " size [%d]\n", i,
-- GMNAL_SMALL_MSG_SIZE(nal_data));
-- PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
-- return(GMNAL_STATUS_FAIL);
-- }
-- txd->buffer = txbuffer;
-- txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data);
-- txd->gm_size = gm_min_size_for_length(txd->buffer_size);
-- txd->nal_data = (struct _gmnal_data_t*)nal_data;
-- txd->rxt = 0;
--
-- txd->next = nal_data->stxd;
-- nal_data->stxd = txd;
-- CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
-- "size [%d]\n", txd, txd->buffer, txd->buffer_size);
-- }
--
-- for (i=0; i<=nrxt_stx; i++) {
-- PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t));
-- if (!txd) {
-- CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i);
-- return(GMNAL_STATUS_NOMEM);
-- }
-- GMNAL_GM_LOCK(nal_data);
-- txbuffer = gm_dma_malloc(nal_data->gm_port,
-- GMNAL_SMALL_MSG_SIZE(nal_data));
-- GMNAL_GM_UNLOCK(nal_data);
-- if (!txbuffer) {
-- CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
-- " size [%d]\n", i,
-- GMNAL_SMALL_MSG_SIZE(nal_data));
-- PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
-- return(GMNAL_STATUS_FAIL);
-- }
-- txd->buffer = txbuffer;
-- txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data);
-- txd->gm_size = gm_min_size_for_length(txd->buffer_size);
-- txd->nal_data = (struct _gmnal_data_t*)nal_data;
-- txd->rxt = 1;
--
-- txd->next = nal_data->rxt_stxd;
-- nal_data->rxt_stxd = txd;
-- CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
-- "size [%d]\n", txd, txd->buffer, txd->buffer_size);
-- }
--
-- /*
-- * string together large tokens
-- */
-- for (i=0; i<=nltx ; i++) {
-- PORTAL_ALLOC(ltxd, sizeof(gmnal_ltxd_t));
-- ltxd->next = nal_data->ltxd;
-- nal_data->ltxd = ltxd;
-- }
-- return(GMNAL_STATUS_OK);
--}
--
--/* Free the list of wired and gm_registered small tx buffers and
-- * the tx descriptors that go along with them.
-- */
--void
--gmnal_free_txd(gmnal_data_t *nal_data)
--{
-- gmnal_stxd_t *txd = nal_data->stxd, *_txd = NULL;
-- gmnal_ltxd_t *ltxd = NULL, *_ltxd = NULL;
--
-- CDEBUG(D_TRACE, "gmnal_free_small tx\n");
--
-- while(txd) {
-- CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
-- "size [%d]\n", txd, txd->buffer, txd->buffer_size);
-- _txd = txd;
-- txd = txd->next;
-- GMNAL_GM_LOCK(nal_data);
-- gm_dma_free(nal_data->gm_port, _txd->buffer);
-- GMNAL_GM_UNLOCK(nal_data);
-- PORTAL_FREE(_txd, sizeof(gmnal_stxd_t));
-- }
-- txd = nal_data->rxt_stxd;
-- while(txd) {
-- CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
-- "size [%d]\n", txd, txd->buffer, txd->buffer_size);
-- _txd = txd;
-- txd = txd->next;
-- GMNAL_GM_LOCK(nal_data);
-- gm_dma_free(nal_data->gm_port, _txd->buffer);
-- GMNAL_GM_UNLOCK(nal_data);
-- PORTAL_FREE(_txd, sizeof(gmnal_stxd_t));
-- }
-- ltxd = nal_data->ltxd;
-- while(txd) {
-- _ltxd = ltxd;
-- ltxd = ltxd->next;
-- PORTAL_FREE(_ltxd, sizeof(gmnal_ltxd_t));
-- }
--
-- return;
--}
--
--
--/*
-- * Get a txd from the list
-- * This get us a wired and gm_registered small tx buffer.
-- * This implicitly gets us a send token also.
-- */
--gmnal_stxd_t *
--gmnal_get_stxd(gmnal_data_t *nal_data, int block)
--{
--
-- gmnal_stxd_t *txd = NULL;
-- pid_t pid = current->pid;
--
--
-- CDEBUG(D_TRACE, "gmnal_get_stxd nal_data [%p] block[%d] pid [%d]\n",
-- nal_data, block, pid);
--
-- if (gmnal_is_rxthread(nal_data)) {
-- CDEBUG(D_INFO, "RXTHREAD Attempting to get token\n");
-- GMNAL_RXT_TXD_GETTOKEN(nal_data);
-- GMNAL_RXT_TXD_LOCK(nal_data);
-- txd = nal_data->rxt_stxd;
-- nal_data->rxt_stxd = txd->next;
-- GMNAL_RXT_TXD_UNLOCK(nal_data);
-- CDEBUG(D_INFO, "RXTHREAD got [%p], head is [%p]\n",
-- txd, nal_data->rxt_stxd);
-- txd->kniov = 0;
-- txd->rxt = 1;
-- } else {
-- if (block) {
-- CDEBUG(D_INFO, "Attempting to get token\n");
-- GMNAL_TXD_GETTOKEN(nal_data);
-- CDEBUG(D_PORTALS, "Got token\n");
-- } else {
-- if (GMNAL_TXD_TRYGETTOKEN(nal_data)) {
-- CDEBUG(D_ERROR, "can't get token\n");
-- return(NULL);
-- }
-- }
-- GMNAL_TXD_LOCK(nal_data);
-- txd = nal_data->stxd;
-- nal_data->stxd = txd->next;
-- GMNAL_TXD_UNLOCK(nal_data);
-- CDEBUG(D_INFO, "got [%p], head is [%p]\n", txd,
-- nal_data->stxd);
-- txd->kniov = 0;
-- } /* general txd get */
-- return(txd);
--}
--
--/*
-- * Return a txd to the list
-- */
--void
--gmnal_return_stxd(gmnal_data_t *nal_data, gmnal_stxd_t *txd)
--{
-- CDEBUG(D_TRACE, "nal_data [%p], txd[%p] rxt[%d]\n", nal_data,
-- txd, txd->rxt);
--
-- /*
-- * this transmit descriptor is
-- * for the rxthread
-- */
-- if (txd->rxt) {
-- GMNAL_RXT_TXD_LOCK(nal_data);
-- txd->next = nal_data->rxt_stxd;
-- nal_data->rxt_stxd = txd;
-- GMNAL_RXT_TXD_UNLOCK(nal_data);
-- GMNAL_RXT_TXD_RETURNTOKEN(nal_data);
-- CDEBUG(D_INFO, "Returned stxd to rxthread list\n");
-- } else {
-- GMNAL_TXD_LOCK(nal_data);
-- txd->next = nal_data->stxd;
-- nal_data->stxd = txd;
-- GMNAL_TXD_UNLOCK(nal_data);
-- GMNAL_TXD_RETURNTOKEN(nal_data);
-- CDEBUG(D_INFO, "Returned stxd to general list\n");
-- }
-- return;
--}
--
--
--/*
-- * Get a large transmit descriptor from the free list
-- * This implicitly gets us a transmit token .
-- * always wait for one.
-- */
--gmnal_ltxd_t *
--gmnal_get_ltxd(gmnal_data_t *nal_data)
--{
--
-- gmnal_ltxd_t *ltxd = NULL;
--
-- CDEBUG(D_TRACE, "nal_data [%p]\n", nal_data);
--
-- GMNAL_LTXD_GETTOKEN(nal_data);
-- GMNAL_LTXD_LOCK(nal_data);
-- ltxd = nal_data->ltxd;
-- nal_data->ltxd = ltxd->next;
-- GMNAL_LTXD_UNLOCK(nal_data);
-- CDEBUG(D_INFO, "got [%p], head is [%p]\n", ltxd, nal_data->ltxd);
-- return(ltxd);
--}
--
--/*
-- * Return an ltxd to the list
-- */
--void
--gmnal_return_ltxd(gmnal_data_t *nal_data, gmnal_ltxd_t *ltxd)
--{
-- CDEBUG(D_TRACE, "nal_data [%p], ltxd[%p]\n", nal_data, ltxd);
--
-- GMNAL_LTXD_LOCK(nal_data);
-- ltxd->next = nal_data->ltxd;
-- nal_data->ltxd = ltxd;
-- GMNAL_LTXD_UNLOCK(nal_data);
-- GMNAL_LTXD_RETURNTOKEN(nal_data);
-- return;
--}
--/*
-- * allocate a number of small rx buffers and register with GM
-- * so they are wired and set up for DMA. This is a costly operation.
-- * Also allocate a corrosponding descriptor to keep track of
-- * the buffer.
-- * Put all descriptors on singly linked list to be available to
-- * receive thread.
-- */
--int
--gmnal_alloc_srxd(gmnal_data_t *nal_data)
--{
-- int nrx = 0, nsrx = 0, i = 0;
-- gmnal_srxd_t *rxd = NULL;
-- void *rxbuffer = NULL;
--
-- CDEBUG(D_TRACE, "gmnal_alloc_small rx\n");
--
-- GMNAL_GM_LOCK(nal_data);
-- nrx = gm_num_receive_tokens(nal_data->gm_port);
-- GMNAL_GM_UNLOCK(nal_data);
-- CDEBUG(D_INFO, "total number of receive tokens available is [%d]\n",
-- nrx);
--
-- nsrx = nrx/2;
-- nsrx = 12;
-- /*
-- * make the number of rxds twice our total
-- * number of stxds plus 1
-- */
-- nsrx = num_stxds*2 + 2;
--
-- CDEBUG(D_INFO, "Allocated [%d] receive tokens to small messages\n",
-- nsrx);
--
--
-- GMNAL_GM_LOCK(nal_data);
-- nal_data->srxd_hash = gm_create_hash(gm_hash_compare_ptrs,
-- gm_hash_hash_ptr, 0, 0, nsrx, 0);
-- GMNAL_GM_UNLOCK(nal_data);
-- if (!nal_data->srxd_hash) {
-- CDEBUG(D_ERROR, "Failed to create hash table\n");
-- return(GMNAL_STATUS_NOMEM);
-- }
--
-- GMNAL_RXD_TOKEN_INIT(nal_data, nsrx);
-- GMNAL_RXD_LOCK_INIT(nal_data);
--
-- for (i=0; i<=nsrx; i++) {
-- PORTAL_ALLOC(rxd, sizeof(gmnal_srxd_t));
-- if (!rxd) {
-- CDEBUG(D_ERROR, "Failed to malloc rxd [%d]\n", i);
-- return(GMNAL_STATUS_NOMEM);
-- }
--#if 0
-- PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
-- if (!rxbuffer) {
-- CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], "
-- "size [%d]\n", i,
-- GMNAL_SMALL_MSG_SIZE(nal_data));
-- PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
-- return(GMNAL_STATUS_FAIL);
-- }
-- CDEBUG(D_NET, "Calling gm_register_memory with port [%p] "
-- "rxbuffer [%p], size [%d]\n", nal_data->gm_port,
-- rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
-- GMNAL_GM_LOCK(nal_data);
-- gm_status = gm_register_memory(nal_data->gm_port, rxbuffer,
-- GMNAL_SMALL_MSG_SIZE(nal_data));
-- GMNAL_GM_UNLOCK(nal_data);
-- if (gm_status != GM_SUCCESS) {
-- CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],"
-- " index [%d]\n", rxbuffer, i);
-- switch(gm_status) {
-- case(GM_FAILURE):
-- CDEBUG(D_ERROR, "GM_FAILURE\n");
-- break;
-- case(GM_PERMISSION_DENIED):
-- CDEBUG(D_ERROR, "PERMISSION_DENIED\n");
-- break;
-- case(GM_INVALID_PARAMETER):
-- CDEBUG(D_ERROR, "INVALID_PARAMETER\n");
-- break;
-- default:
-- CDEBUG(D_ERROR, "Unknown error[%d]\n",
-- gm_status);
-- break;
--
-- }
-- return(GMNAL_STATUS_FAIL);
-- }
--#else
-- GMNAL_GM_LOCK(nal_data);
-- rxbuffer = gm_dma_malloc(nal_data->gm_port,
-- GMNAL_SMALL_MSG_SIZE(nal_data));
-- GMNAL_GM_UNLOCK(nal_data);
-- if (!rxbuffer) {
-- CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],"
-- " size [%d]\n", i,
-- GMNAL_SMALL_MSG_SIZE(nal_data));
-- PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
-- return(GMNAL_STATUS_FAIL);
-- }
--#endif
--
-- rxd->buffer = rxbuffer;
-- rxd->size = GMNAL_SMALL_MSG_SIZE(nal_data);
-- rxd->gmsize = gm_min_size_for_length(rxd->size);
--
-- if (gm_hash_insert(nal_data->srxd_hash,
-- (void*)rxbuffer, (void*)rxd)) {
--
-- CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] "
-- "for rxbuffer[%p]\n", rxd, rxbuffer);
-- return(GMNAL_STATUS_FAIL);
-- }
--
-- rxd->next = nal_data->srxd;
-- nal_data->srxd = rxd;
-- CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], "
-- "size [%d]\n", rxd, rxd->buffer, rxd->size);
-- }
--
-- return(GMNAL_STATUS_OK);
--}
--
--
--
--/* Free the list of wired and gm_registered small rx buffers and the
-- * rx descriptors that go along with them.
-- */
--void
--gmnal_free_srxd(gmnal_data_t *nal_data)
--{
-- gmnal_srxd_t *rxd = nal_data->srxd, *_rxd = NULL;
--
-- CDEBUG(D_TRACE, "gmnal_free_small rx\n");
--
-- while(rxd) {
-- CDEBUG(D_INFO, "Freeing rxd [%p] buffer [%p], size [%d]\n",
-- rxd, rxd->buffer, rxd->size);
-- _rxd = rxd;
-- rxd = rxd->next;
--
--#if 0
-- GMNAL_GM_LOCK(nal_data);
-- gm_deregister_memory(nal_data->gm_port, _rxd->buffer,
-- _rxd->size);
-- GMNAL_GM_UNLOCK(nal_data);
-- PORTAL_FREE(_rxd->buffer, GMNAL_SMALL_RXBUFFER_SIZE);
--#else
-- GMNAL_GM_LOCK(nal_data);
-- gm_dma_free(nal_data->gm_port, _rxd->buffer);
-- GMNAL_GM_UNLOCK(nal_data);
--#endif
-- PORTAL_FREE(_rxd, sizeof(gmnal_srxd_t));
-- }
-- return;
--}
--
--
--/*
-- * Get a rxd from the free list
-- * This get us a wired and gm_registered small rx buffer.
-- * This implicitly gets us a receive token also.
-- */
--gmnal_srxd_t *
--gmnal_get_srxd(gmnal_data_t *nal_data, int block)
--{
--
-- gmnal_srxd_t *rxd = NULL;
-- CDEBUG(D_TRACE, "nal_data [%p] block [%d]\n", nal_data, block);
--
-- if (block) {
-- GMNAL_RXD_GETTOKEN(nal_data);
-- } else {
-- if (GMNAL_RXD_TRYGETTOKEN(nal_data)) {
-- CDEBUG(D_INFO, "gmnal_get_srxd Can't get token\n");
-- return(NULL);
-- }
-- }
-- GMNAL_RXD_LOCK(nal_data);
-- rxd = nal_data->srxd;
-- if (rxd)
-- nal_data->srxd = rxd->next;
-- GMNAL_RXD_UNLOCK(nal_data);
-- CDEBUG(D_INFO, "got [%p], head is [%p]\n", rxd, nal_data->srxd);
-- return(rxd);
--}
--
--/*
-- * Return an rxd to the list
-- */
--void
--gmnal_return_srxd(gmnal_data_t *nal_data, gmnal_srxd_t *rxd)
--{
-- CDEBUG(D_TRACE, "nal_data [%p], rxd[%p]\n", nal_data, rxd);
--
-- GMNAL_RXD_LOCK(nal_data);
-- rxd->next = nal_data->srxd;
-- nal_data->srxd = rxd;
-- GMNAL_RXD_UNLOCK(nal_data);
-- GMNAL_RXD_RETURNTOKEN(nal_data);
-- return;
--}
--
--/*
-- * Given a pointer to a srxd find
-- * the relevant descriptor for it
-- * This is done by searching a hash
-- * list that is created when the srxd's
-- * are created
-- */
--gmnal_srxd_t *
--gmnal_rxbuffer_to_srxd(gmnal_data_t *nal_data, void *rxbuffer)
--{
-- gmnal_srxd_t *srxd = NULL;
-- CDEBUG(D_TRACE, "nal_data [%p], rxbuffer [%p]\n", nal_data, rxbuffer);
-- srxd = gm_hash_find(nal_data->srxd_hash, rxbuffer);
-- CDEBUG(D_INFO, "srxd is [%p]\n", srxd);
-- return(srxd);
--}
--
--
--void
--gmnal_stop_rxthread(gmnal_data_t *nal_data)
--{
-- int delay = 30;
--
--
--
-- CDEBUG(D_TRACE, "Attempting to stop rxthread nal_data [%p]\n",
-- nal_data);
--
-- nal_data->rxthread_stop_flag = GMNAL_THREAD_STOP;
--
-- gmnal_remove_rxtwe(nal_data);
-- /*
-- * kick the thread
-- */
-- up(&nal_data->rxtwe_wait);
--
-- while(nal_data->rxthread_flag != GMNAL_THREAD_RESET && delay--) {
-- CDEBUG(D_INFO, "gmnal_stop_rxthread sleeping\n");
-- gmnal_yield(1);
-- up(&nal_data->rxtwe_wait);
-- }
--
-- if (nal_data->rxthread_flag != GMNAL_THREAD_RESET) {
-- CDEBUG(D_ERROR, "I don't know how to wake the thread\n");
-- } else {
-- CDEBUG(D_INFO, "rx thread seems to have stopped\n");
-- }
--}
--
--void
--gmnal_stop_ctthread(gmnal_data_t *nal_data)
--{
-- int delay = 15;
--
--
--
-- CDEBUG(D_TRACE, "Attempting to stop ctthread nal_data [%p]\n",
-- nal_data);
--
-- nal_data->ctthread_flag = GMNAL_THREAD_STOP;
-- GMNAL_GM_LOCK(nal_data);
-- gm_set_alarm(nal_data->gm_port, &nal_data->ctthread_alarm, 10,
-- NULL, NULL);
-- GMNAL_GM_UNLOCK(nal_data);
--
-- while(nal_data->ctthread_flag == GMNAL_THREAD_STOP && delay--) {
-- CDEBUG(D_INFO, "gmnal_stop_ctthread sleeping\n");
-- gmnal_yield(1);
-- }
--
-- if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) {
-- CDEBUG(D_ERROR, "I DON'T KNOW HOW TO WAKE THE THREAD\n");
-- } else {
-- CDEBUG(D_INFO, "CT THREAD SEEMS TO HAVE STOPPED\n");
-- }
--}
--
--
--
--char *
--gmnal_gm_error(gm_status_t status)
--{
-- return(gm_strerror(status));
--
-- switch(status) {
-- case(GM_SUCCESS):
-- return("SUCCESS");
-- case(GM_FAILURE):
-- return("FAILURE");
-- case(GM_INPUT_BUFFER_TOO_SMALL):
-- return("INPUT_BUFFER_TOO_SMALL");
-- case(GM_OUTPUT_BUFFER_TOO_SMALL):
-- return("OUTPUT_BUFFER_TOO_SMALL");
-- case(GM_TRY_AGAIN ):
-- return("TRY_AGAIN");
-- case(GM_BUSY):
-- return("BUSY");
-- case(GM_MEMORY_FAULT):
-- return("MEMORY_FAULT");
-- case(GM_INTERRUPTED):
-- return("INTERRUPTED");
-- case(GM_INVALID_PARAMETER):
-- return("INVALID_PARAMETER");
-- case(GM_OUT_OF_MEMORY):
-- return("OUT_OF_MEMORY");
-- case(GM_INVALID_COMMAND):
-- return("INVALID_COMMAND");
-- case(GM_PERMISSION_DENIED):
-- return("PERMISSION_DENIED");
-- case(GM_INTERNAL_ERROR):
-- return("INTERNAL_ERROR");
-- case(GM_UNATTACHED):
-- return("UNATTACHED");
-- case(GM_UNSUPPORTED_DEVICE):
-- return("UNSUPPORTED_DEVICE");
-- case(GM_SEND_TIMED_OUT):
-- return("GM_SEND_TIMEDOUT");
-- case(GM_SEND_REJECTED):
-- return("GM_SEND_REJECTED");
-- case(GM_SEND_TARGET_PORT_CLOSED):
-- return("GM_SEND_TARGET_PORT_CLOSED");
-- case(GM_SEND_TARGET_NODE_UNREACHABLE):
-- return("GM_SEND_TARGET_NODE_UNREACHABLE");
-- case(GM_SEND_DROPPED):
-- return("GM_SEND_DROPPED");
-- case(GM_SEND_PORT_CLOSED):
-- return("GM_SEND_PORT_CLOSED");
-- case(GM_NODE_ID_NOT_YET_SET):
-- return("GM_NODE_ID_NOT_YET_SET");
-- case(GM_STILL_SHUTTING_DOWN):
-- return("GM_STILL_SHUTTING_DOWN");
-- case(GM_CLONE_BUSY):
-- return("GM_CLONE_BUSY");
-- case(GM_NO_SUCH_DEVICE):
-- return("GM_NO_SUCH_DEVICE");
-- case(GM_ABORTED):
-- return("GM_ABORTED");
-- case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-- return("GM_INCOMPATIBLE_LIB_AND_DRIVER");
-- case(GM_UNTRANSLATED_SYSTEM_ERROR):
-- return("GM_UNTRANSLATED_SYSTEM_ERROR");
-- case(GM_ACCESS_DENIED):
-- return("GM_ACCESS_DENIED");
--
--
--/*
-- * These ones are in the docs but aren't in the header file
-- case(GM_DEV_NOT_FOUND):
-- return("GM_DEV_NOT_FOUND");
-- case(GM_INVALID_PORT_NUMBER):
-- return("GM_INVALID_PORT_NUMBER");
-- case(GM_UC_ERROR):
-- return("GM_US_ERROR");
-- case(GM_PAGE_TABLE_FULL):
-- return("GM_PAGE_TABLE_FULL");
-- case(GM_MINOR_OVERFLOW):
-- return("GM_MINOR_OVERFLOW");
-- case(GM_SEND_ORPHANED):
-- return("GM_SEND_ORPHANED");
-- case(GM_HARDWARE_FAULT):
-- return("GM_HARDWARE_FAULT");
-- case(GM_DATA_CORRUPTED):
-- return("GM_DATA_CORRUPTED");
-- case(GM_TIMED_OUT):
-- return("GM_TIMED_OUT");
-- case(GM_USER_ERROR):
-- return("GM_USER_ERROR");
-- case(GM_NO_MATCH):
-- return("GM_NOMATCH");
-- case(GM_NOT_SUPPORTED_IN_KERNEL):
-- return("GM_NOT_SUPPORTED_IN_KERNEL");
-- case(GM_NOT_SUPPORTED_ON_ARCH):
-- return("GM_NOT_SUPPORTED_ON_ARCH");
-- case(GM_PTE_REF_CNT_OVERFLOW):
-- return("GM_PTR_REF_CNT_OVERFLOW");
-- case(GM_NO_DRIVER_SUPPORT):
-- return("GM_NO_DRIVER_SUPPORT");
-- case(GM_FIRMWARE_NOT_RUNNING):
-- return("GM_FIRMWARE_NOT_RUNNING");
--
-- * These ones are in the docs but aren't in the header file
-- */
-- default:
-- return("UNKNOWN GM ERROR CODE");
-- }
--}
--
--
--char *
--gmnal_rxevent(gm_recv_event_t *ev)
--{
-- short event;
-- event = GM_RECV_EVENT_TYPE(ev);
-- switch(event) {
-- case(GM_NO_RECV_EVENT):
-- return("GM_NO_RECV_EVENT");
-- case(GM_SENDS_FAILED_EVENT):
-- return("GM_SEND_FAILED_EVENT");
-- case(GM_ALARM_EVENT):
-- return("GM_ALARM_EVENT");
-- case(GM_SENT_EVENT):
-- return("GM_SENT_EVENT");
-- case(_GM_SLEEP_EVENT):
-- return("_GM_SLEEP_EVENT");
-- case(GM_RAW_RECV_EVENT):
-- return("GM_RAW_RECV_EVENT");
-- case(GM_BAD_SEND_DETECTED_EVENT):
-- return("GM_BAD_SEND_DETECTED_EVENT");
-- case(GM_SEND_TOKEN_VIOLATION_EVENT):
-- return("GM_SEND_TOKEN_VIOLATION_EVENT");
-- case(GM_RECV_TOKEN_VIOLATION_EVENT):
-- return("GM_RECV_TOKEN_VIOLATION_EVENT");
-- case(GM_BAD_RECV_TOKEN_EVENT):
-- return("GM_BAD_RECV_TOKEN_EVENT");
-- case(GM_ALARM_VIOLATION_EVENT):
-- return("GM_ALARM_VIOLATION_EVENT");
-- case(GM_RECV_EVENT):
-- return("GM_RECV_EVENT");
-- case(GM_HIGH_RECV_EVENT):
-- return("GM_HIGH_RECV_EVENT");
-- case(GM_PEER_RECV_EVENT):
-- return("GM_PEER_RECV_EVENT");
-- case(GM_HIGH_PEER_RECV_EVENT):
-- return("GM_HIGH_PEER_RECV_EVENT");
-- case(GM_FAST_RECV_EVENT):
-- return("GM_FAST_RECV_EVENT");
-- case(GM_FAST_HIGH_RECV_EVENT):
-- return("GM_FAST_HIGH_RECV_EVENT");
-- case(GM_FAST_PEER_RECV_EVENT):
-- return("GM_FAST_PEER_RECV_EVENT");
-- case(GM_FAST_HIGH_PEER_RECV_EVENT):
-- return("GM_FAST_HIGH_PEER_RECV_EVENT");
-- case(GM_REJECTED_SEND_EVENT):
-- return("GM_REJECTED_SEND_EVENT");
-- case(GM_ORPHANED_SEND_EVENT):
-- return("GM_ORPHANED_SEND_EVENT");
-- case(GM_BAD_RESEND_DETECTED_EVENT):
-- return("GM_BAD_RESEND_DETETED_EVENT");
-- case(GM_DROPPED_SEND_EVENT):
-- return("GM_DROPPED_SEND_EVENT");
-- case(GM_BAD_SEND_VMA_EVENT):
-- return("GM_BAD_SEND_VMA_EVENT");
-- case(GM_BAD_RECV_VMA_EVENT):
-- return("GM_BAD_RECV_VMA_EVENT");
-- case(_GM_FLUSHED_ALARM_EVENT):
-- return("GM_FLUSHED_ALARM_EVENT");
-- case(GM_SENT_TOKENS_EVENT):
-- return("GM_SENT_TOKENS_EVENTS");
-- case(GM_IGNORE_RECV_EVENT):
-- return("GM_IGNORE_RECV_EVENT");
-- case(GM_ETHERNET_RECV_EVENT):
-- return("GM_ETHERNET_RECV_EVENT");
-- case(GM_NEW_NO_RECV_EVENT):
-- return("GM_NEW_NO_RECV_EVENT");
-- case(GM_NEW_SENDS_FAILED_EVENT):
-- return("GM_NEW_SENDS_FAILED_EVENT");
-- case(GM_NEW_ALARM_EVENT):
-- return("GM_NEW_ALARM_EVENT");
-- case(GM_NEW_SENT_EVENT):
-- return("GM_NEW_SENT_EVENT");
-- case(_GM_NEW_SLEEP_EVENT):
-- return("GM_NEW_SLEEP_EVENT");
-- case(GM_NEW_RAW_RECV_EVENT):
-- return("GM_NEW_RAW_RECV_EVENT");
-- case(GM_NEW_BAD_SEND_DETECTED_EVENT):
-- return("GM_NEW_BAD_SEND_DETECTED_EVENT");
-- case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT):
-- return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT");
-- case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT):
-- return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT");
-- case(GM_NEW_BAD_RECV_TOKEN_EVENT):
-- return("GM_NEW_BAD_RECV_TOKEN_EVENT");
-- case(GM_NEW_ALARM_VIOLATION_EVENT):
-- return("GM_NEW_ALARM_VIOLATION_EVENT");
-- case(GM_NEW_RECV_EVENT):
-- return("GM_NEW_RECV_EVENT");
-- case(GM_NEW_HIGH_RECV_EVENT):
-- return("GM_NEW_HIGH_RECV_EVENT");
-- case(GM_NEW_PEER_RECV_EVENT):
-- return("GM_NEW_PEER_RECV_EVENT");
-- case(GM_NEW_HIGH_PEER_RECV_EVENT):
-- return("GM_NEW_HIGH_PEER_RECV_EVENT");
-- case(GM_NEW_FAST_RECV_EVENT):
-- return("GM_NEW_FAST_RECV_EVENT");
-- case(GM_NEW_FAST_HIGH_RECV_EVENT):
-- return("GM_NEW_FAST_HIGH_RECV_EVENT");
-- case(GM_NEW_FAST_PEER_RECV_EVENT):
-- return("GM_NEW_FAST_PEER_RECV_EVENT");
-- case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT):
-- return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT");
-- case(GM_NEW_REJECTED_SEND_EVENT):
-- return("GM_NEW_REJECTED_SEND_EVENT");
-- case(GM_NEW_ORPHANED_SEND_EVENT):
-- return("GM_NEW_ORPHANED_SEND_EVENT");
-- case(_GM_NEW_PUT_NOTIFICATION_EVENT):
-- return("_GM_NEW_PUT_NOTIFICATION_EVENT");
-- case(GM_NEW_FREE_SEND_TOKEN_EVENT):
-- return("GM_NEW_FREE_SEND_TOKEN_EVENT");
-- case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT):
-- return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT");
-- case(GM_NEW_BAD_RESEND_DETECTED_EVENT):
-- return("GM_NEW_BAD_RESEND_DETECTED_EVENT");
-- case(GM_NEW_DROPPED_SEND_EVENT):
-- return("GM_NEW_DROPPED_SEND_EVENT");
-- case(GM_NEW_BAD_SEND_VMA_EVENT):
-- return("GM_NEW_BAD_SEND_VMA_EVENT");
-- case(GM_NEW_BAD_RECV_VMA_EVENT):
-- return("GM_NEW_BAD_RECV_VMA_EVENT");
-- case(_GM_NEW_FLUSHED_ALARM_EVENT):
-- return("GM_NEW_FLUSHED_ALARM_EVENT");
-- case(GM_NEW_SENT_TOKENS_EVENT):
-- return("GM_NEW_SENT_TOKENS_EVENT");
-- case(GM_NEW_IGNORE_RECV_EVENT):
-- return("GM_NEW_IGNORE_RECV_EVENT");
-- case(GM_NEW_ETHERNET_RECV_EVENT):
-- return("GM_NEW_ETHERNET_RECV_EVENT");
-- default:
-- return("Unknown Recv event");
--#if 0
-- case(/* _GM_PUT_NOTIFICATION_EVENT */
-- case(/* GM_FREE_SEND_TOKEN_EVENT */
-- case(/* GM_FREE_HIGH_SEND_TOKEN_EVENT */
--#endif
-- }
--}
--
--
--void
--gmnal_yield(int delay)
--{
-- set_current_state(TASK_INTERRUPTIBLE);
-- schedule_timeout(delay);
--}
--
--int
--gmnal_is_small_msg(gmnal_data_t *nal_data, int niov, struct iovec *iov,
-- int len)
--{
--
-- CDEBUG(D_TRACE, "len [%d] limit[%d]\n", len,
-- GMNAL_SMALL_MSG_SIZE(nal_data));
--
-- if ((len + sizeof(ptl_hdr_t) + sizeof(gmnal_msghdr_t))
-- < GMNAL_SMALL_MSG_SIZE(nal_data)) {
--
-- CDEBUG(D_INFO, "Yep, small message\n");
-- return(1);
-- } else {
-- CDEBUG(D_ERROR, "No, not small message\n");
-- /*
-- * could be made up of lots of little ones !
-- */
-- return(0);
-- }
--
--}
--
--/*
-- * extract info from the receive event.
-- * Have to do this before the next call to gm_receive
-- * Deal with all endian stuff here.
-- * Then stick work entry on list where rxthreads
-- * can get it to complete the receive
-- */
--int
--gmnal_add_rxtwe(gmnal_data_t *nal_data, gm_recv_t *recv)
--{
-- gmnal_rxtwe_t *we = NULL;
--
-- CDEBUG(D_NET, "adding entry to list\n");
--
-- PORTAL_ALLOC(we, sizeof(gmnal_rxtwe_t));
-- if (!we) {
-- CDEBUG(D_ERROR, "failed to malloc\n");
-- return(GMNAL_STATUS_FAIL);
-- }
-- we->buffer = gm_ntohp(recv->buffer);
-- we->snode = (int)gm_ntoh_u16(recv->sender_node_id);
-- we->sport = (int)gm_ntoh_u8(recv->sender_port_id);
-- we->type = (int)gm_ntoh_u8(recv->type);
-- we->length = (int)gm_ntohl(recv->length);
--
-- spin_lock(&nal_data->rxtwe_lock);
-- if (nal_data->rxtwe_tail) {
-- nal_data->rxtwe_tail->next = we;
-- } else {
-- nal_data->rxtwe_head = we;
-- nal_data->rxtwe_tail = we;
-- }
-- nal_data->rxtwe_tail = we;
-- spin_unlock(&nal_data->rxtwe_lock);
--
-- up(&nal_data->rxtwe_wait);
-- return(GMNAL_STATUS_OK);
--}
--
--void
--gmnal_remove_rxtwe(gmnal_data_t *nal_data)
--{
-- gmnal_rxtwe_t *_we, *we = nal_data->rxtwe_head;
--
-- CDEBUG(D_NET, "removing all work list entries\n");
--
-- spin_lock(&nal_data->rxtwe_lock);
-- CDEBUG(D_NET, "Got lock\n");
-- while (we) {
-- _we = we;
-- we = we->next;
-- PORTAL_FREE(_we, sizeof(gmnal_rxtwe_t));
-- }
-- spin_unlock(&nal_data->rxtwe_lock);
-- nal_data->rxtwe_head = NULL;
-- nal_data->rxtwe_tail = NULL;
--}
--
--gmnal_rxtwe_t *
--gmnal_get_rxtwe(gmnal_data_t *nal_data)
--{
-- gmnal_rxtwe_t *we = NULL;
--
-- CDEBUG(D_NET, "Getting entry to list\n");
--
-- do {
-- down(&nal_data->rxtwe_wait);
-- if (nal_data->rxthread_stop_flag == GMNAL_THREAD_STOP) {
-- /*
-- * time to stop
-- * TO DO some one free the work entries
-- */
-- return(NULL);
-- }
-- spin_lock(&nal_data->rxtwe_lock);
-- if (nal_data->rxtwe_head) {
-- CDEBUG(D_INFO, "Got a work entry\n");
-- we = nal_data->rxtwe_head;
-- nal_data->rxtwe_head = we->next;
-- if (!nal_data->rxtwe_head)
-- nal_data->rxtwe_tail = NULL;
-- } else {
-- CDEBUG(D_WARNING, "woken but no work\n");
-- }
-- spin_unlock(&nal_data->rxtwe_lock);
-- } while (!we);
--
-- CDEBUG(D_INFO, "Returning we[%p]\n", we);
-- return(we);
--}
--
--
--/*
-- * Start the caretaker thread and a number of receiver threads
-- * The caretaker thread gets events from the gm library.
-- * It passes receive events to the receiver threads via a work list.
-- * It processes other events itself in gm_unknown. These will be
-- * callback events or sleeps.
-- */
--int
--gmnal_start_kernel_threads(gmnal_data_t *nal_data)
--{
--
-- int threads = 0;
-- /*
-- * the alarm is used to wake the caretaker thread from
-- * gm_unknown call (sleeping) to exit it.
-- */
-- CDEBUG(D_NET, "Initializing caretaker thread alarm and flag\n");
-- gm_initialize_alarm(&nal_data->ctthread_alarm);
-- nal_data->ctthread_flag = GMNAL_THREAD_RESET;
--
--
-- CDEBUG(D_INFO, "Starting caretaker thread\n");
-- nal_data->ctthread_pid =
-- kernel_thread(gmnal_ct_thread, (void*)nal_data, 0);
-- if (nal_data->ctthread_pid <= 0) {
-- CDEBUG(D_ERROR, "Caretaker thread failed to start\n");
-- return(GMNAL_STATUS_FAIL);
-- }
--
-- while (nal_data->rxthread_flag != GMNAL_THREAD_RESET) {
-- gmnal_yield(1);
-- CDEBUG(D_INFO, "Waiting for caretaker thread signs of life\n");
-- }
--
-- CDEBUG(D_INFO, "caretaker thread has started\n");
--
--
-- /*
-- * Now start a number of receiver threads
-- * these treads get work to do from the caretaker (ct) thread
-- */
-- nal_data->rxthread_flag = GMNAL_THREAD_RESET;
-- nal_data->rxthread_stop_flag = GMNAL_THREAD_RESET;
--
-- for (threads=0; threads<NRXTHREADS; threads++)
-- nal_data->rxthread_pid[threads] = -1;
-- spin_lock_init(&nal_data->rxtwe_lock);
-- spin_lock_init(&nal_data->rxthread_flag_lock);
-- sema_init(&nal_data->rxtwe_wait, 0);
-- nal_data->rxtwe_head = NULL;
-- nal_data->rxtwe_tail = NULL;
-- /*
-- * If the default number of receive threades isn't
-- * modified at load time, then start one thread per cpu
-- */
-- if (num_rx_threads == -1)
-- num_rx_threads = smp_num_cpus;
-- CDEBUG(D_INFO, "Starting [%d] receive threads\n", num_rx_threads);
-- for (threads=0; threads<num_rx_threads; threads++) {
-- nal_data->rxthread_pid[threads] =
-- kernel_thread(gmnal_rx_thread, (void*)nal_data, 0);
-- if (nal_data->rxthread_pid[threads] <= 0) {
-- CDEBUG(D_ERROR, "Receive thread failed to start\n");
-- gmnal_stop_rxthread(nal_data);
-- gmnal_stop_ctthread(nal_data);
-- return(GMNAL_STATUS_FAIL);
-- }
-- }
--
-- for (;;) {
-- spin_lock(&nal_data->rxthread_flag_lock);
-- if (nal_data->rxthread_flag == GMNAL_RXTHREADS_STARTED) {
-- spin_unlock(&nal_data->rxthread_flag_lock);
-- break;
-- }
-- spin_unlock(&nal_data->rxthread_flag_lock);
-- gmnal_yield(1);
-- }
--
-- CDEBUG(D_INFO, "receive threads seem to have started\n");
--
-- return(GMNAL_STATUS_OK);
--}
+++ /dev/null
--MODULES := kqswnal
--kqswnal-objs := qswnal.o qswnal_cb.o
--
- EXTRA_PRE_CFLAGS := @QSWCPPFLAGS@ -I/usr/include
-EXTRA_POST_CFLAGS := @QSWCPPFLAGS@ -I/usr/include
--
--@INCLUDE_RULES@
+++ /dev/null
--/*
-- * Copyright (C) 2002 Cluster File Systems, Inc.
-- * Author: Eric Barton <eric@bartonsoftware.com>
-- *
-- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
-- * W. Marcus Miller - Based on ksocknal
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#include "qswnal.h"
--
- ptl_handle_ni_t kqswnal_ni;
--nal_t kqswnal_api;
--kqswnal_data_t kqswnal_data;
-ptl_handle_ni_t kqswnal_ni;
-kqswnal_tunables_t kqswnal_tunables;
--
--kpr_nal_interface_t kqswnal_router_interface = {
-- kprni_nalid: QSWNAL,
-- kprni_arg: NULL,
-- kprni_fwd: kqswnal_fwd_packet,
-- kprni_notify: NULL, /* we're connectionless */
--};
--
--#if CONFIG_SYSCTL
--#define QSWNAL_SYSCTL 201
--
--#define QSWNAL_SYSCTL_OPTIMIZED_GETS 1
--#define QSWNAL_SYSCTL_COPY_SMALL_FWD 2
--
--static ctl_table kqswnal_ctl_table[] = {
- {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
- &kqswnal_data.kqn_optimized_gets, sizeof (int),
- {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_puts",
- &kqswnal_tunables.kqn_optimized_puts, sizeof (int),
-- 0644, NULL, &proc_dointvec},
- {QSWNAL_SYSCTL_COPY_SMALL_FWD, "copy_small_fwd",
- &kqswnal_data.kqn_copy_small_fwd, sizeof (int),
- {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
- &kqswnal_tunables.kqn_optimized_gets, sizeof (int),
-- 0644, NULL, &proc_dointvec},
-- {0}
--};
--
--static ctl_table kqswnal_top_ctl_table[] = {
-- {QSWNAL_SYSCTL, "qswnal", NULL, 0, 0555, kqswnal_ctl_table},
-- {0}
--};
--#endif
-
- static int
- kqswnal_forward(nal_t *nal,
- int id,
- void *args, size_t args_len,
- void *ret, size_t ret_len)
- {
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
- return (PTL_OK);
- }
-
- static void
- kqswnal_lock (nal_t *nal, unsigned long *flags)
- {
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- nal_cb->cb_cli(nal_cb,flags);
- }
-
- static void
- kqswnal_unlock(nal_t *nal, unsigned long *flags)
- {
- kqswnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kqn_cb;
-
- LASSERT (nal == &kqswnal_api);
- LASSERT (k == &kqswnal_data);
- LASSERT (nal_cb == &kqswnal_lib);
-
- nal_cb->cb_sti(nal_cb,flags);
- }
-
- static int
- kqswnal_shutdown(nal_t *nal, int ni)
- {
- CDEBUG (D_NET, "shutdown\n");
-
- LASSERT (nal == &kqswnal_api);
- return (0);
- }
-
- static void
- kqswnal_yield( nal_t *nal )
- {
- CDEBUG (D_NET, "yield\n");
-
- if (need_resched())
- schedule();
- return;
- }
-
- static nal_t *
- kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
- ptl_pid_t requested_pid)
- {
- ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid);
- int nnids = kqswnal_data.kqn_nnodes;
-
- CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids);
-
- lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size);
-
- return (&kqswnal_api);
- }
--
--int
--kqswnal_get_tx_desc (struct portals_cfg *pcfg)
--{
-- unsigned long flags;
-- struct list_head *tmp;
-- kqswnal_tx_t *ktx;
- ptl_hdr_t *hdr;
-- int index = pcfg->pcfg_count;
-- int rc = -ENOENT;
--
-- spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
--
-- list_for_each (tmp, &kqswnal_data.kqn_activetxds) {
-- if (index-- != 0)
-- continue;
--
-- ktx = list_entry (tmp, kqswnal_tx_t, ktx_list);
- hdr = (ptl_hdr_t *)ktx->ktx_buffer;
--
-- pcfg->pcfg_pbuf1 = (char *)ktx;
- pcfg->pcfg_count = NTOH__u32(ktx->ktx_wire_hdr->type);
- pcfg->pcfg_size = NTOH__u32(ktx->ktx_wire_hdr->payload_length);
- pcfg->pcfg_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid);
- pcfg->pcfg_count = le32_to_cpu(hdr->type);
- pcfg->pcfg_size = le32_to_cpu(hdr->payload_length);
- pcfg->pcfg_nid = le64_to_cpu(hdr->dest_nid);
-- pcfg->pcfg_nid2 = ktx->ktx_nid;
-- pcfg->pcfg_misc = ktx->ktx_launcher;
-- pcfg->pcfg_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) |
-- (!ktx->ktx_isnblk ? 0 : 2) |
-- (ktx->ktx_state << 2);
-- rc = 0;
-- break;
-- }
--
-- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-- return (rc);
--}
--
--int
--kqswnal_cmd (struct portals_cfg *pcfg, void *private)
--{
-- LASSERT (pcfg != NULL);
--
-- switch (pcfg->pcfg_command) {
-- case NAL_CMD_GET_TXDESC:
-- return (kqswnal_get_tx_desc (pcfg));
--
-- case NAL_CMD_REGISTER_MYNID:
-- CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n",
-- pcfg->pcfg_nid - kqswnal_data.kqn_elanid,
-- kqswnal_data.kqn_nid_offset);
-- kqswnal_data.kqn_nid_offset =
-- pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
- kqswnal_lib.ni.nid = pcfg->pcfg_nid;
- kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid;
-- return (0);
--
-- default:
-- return (-EINVAL);
-- }
--}
--
- void __exit
- kqswnal_finalise (void)
-static void
-kqswnal_shutdown(nal_t *nal)
--{
- kqswnal_tx_t *ktx;
- kqswnal_rx_t *krx;
- unsigned long flags;
- int do_lib_fini = 0;
-
- /* NB The first ref was this module! */
- if (nal->nal_refct != 0) {
- PORTAL_MODULE_UNUSE;
- return;
- }
-
- CDEBUG (D_NET, "shutdown\n");
- LASSERT (nal == &kqswnal_api);
--
-- switch (kqswnal_data.kqn_init)
-- {
-- default:
-- LASSERT (0);
--
-- case KQN_INIT_ALL:
- #if CONFIG_SYSCTL
- if (kqswnal_data.kqn_sysctl != NULL)
- unregister_sysctl_table (kqswnal_data.kqn_sysctl);
- #endif
- PORTAL_SYMBOL_UNREGISTER (kqswnal_ni);
- kportal_nal_unregister(QSWNAL);
- libcfs_nal_cmd_unregister(QSWNAL);
-- /* fall through */
--
- case KQN_INIT_PTL:
- PtlNIFini (kqswnal_ni);
- lib_fini (&kqswnal_lib);
- case KQN_INIT_LIB:
- do_lib_fini = 1;
-- /* fall through */
--
-- case KQN_INIT_DATA:
-- break;
--
-- case KQN_INIT_NOTHING:
-- return;
-- }
--
-- /**********************************************************************/
- /* Make router stop her calling me and fail any more call-ins */
- /* Tell router we're shutting down. Any router calls my threads
- * make will now fail immediately and the router will stop calling
- * into me. */
-- kpr_shutdown (&kqswnal_data.kqn_router);
-
-
-- /**********************************************************************/
- /* flag threads we've started to terminate and wait for all to ack */
-
- /* Signal the start of shutdown... */
- spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags);
-- kqswnal_data.kqn_shuttingdown = 1;
- wake_up_all (&kqswnal_data.kqn_sched_waitq);
- spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
--
- while (atomic_read (&kqswnal_data.kqn_nthreads_running) != 0) {
- CDEBUG(D_NET, "waiting for %d threads to start shutting down\n",
- atomic_read (&kqswnal_data.kqn_nthreads_running));
- wake_up_all(&kqswnal_data.kqn_idletxd_waitq);
-
- /**********************************************************************/
- /* wait for sends that have allocated a tx desc to launch or give up */
- while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) {
- CDEBUG(D_NET, "waiting for %d pending sends\n",
- atomic_read (&kqswnal_data.kqn_pending_txs));
-- set_current_state (TASK_UNINTERRUPTIBLE);
-- schedule_timeout (HZ);
-- }
--
-- /**********************************************************************/
-- /* close elan comms */
--#if MULTIRAIL_EKC
- /* Shut down receivers first; rx callbacks might try sending... */
-- if (kqswnal_data.kqn_eprx_small != NULL)
-- ep_free_rcvr (kqswnal_data.kqn_eprx_small);
--
-- if (kqswnal_data.kqn_eprx_large != NULL)
-- ep_free_rcvr (kqswnal_data.kqn_eprx_large);
-
- /* NB ep_free_rcvr() returns only after we've freed off all receive
- * buffers (see shutdown handling in kqswnal_requeue_rx()). This
- * means we must have completed any messages we passed to
- * lib_parse() or kpr_fwd_start(). */
--
-- if (kqswnal_data.kqn_eptx != NULL)
-- ep_free_xmtr (kqswnal_data.kqn_eptx);
--
- /* freeing the xmtr completes all txs pdq */
- /* NB ep_free_xmtr() returns only after all outstanding transmits
- * have called their callback... */
-- LASSERT(list_empty(&kqswnal_data.kqn_activetxds));
--#else
- /* "Old" EKC just pretends to shutdown cleanly but actually
- * provides no guarantees */
-- if (kqswnal_data.kqn_eprx_small != NULL)
-- ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small);
--
-- if (kqswnal_data.kqn_eprx_large != NULL)
-- ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large);
--
-- /* wait for transmits to complete */
-- while (!list_empty(&kqswnal_data.kqn_activetxds)) {
-- CWARN("waiting for active transmits to complete\n");
-- set_current_state(TASK_UNINTERRUPTIBLE);
-- schedule_timeout(HZ);
-- }
--
-- if (kqswnal_data.kqn_eptx != NULL)
-- ep_free_large_xmtr (kqswnal_data.kqn_eptx);
--#endif
-- /**********************************************************************/
-- /* flag threads to terminate, wake them and wait for them to die */
-
-- kqswnal_data.kqn_shuttingdown = 2;
-- wake_up_all (&kqswnal_data.kqn_sched_waitq);
--
-- while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
-- CDEBUG(D_NET, "waiting for %d threads to terminate\n",
-- atomic_read (&kqswnal_data.kqn_nthreads));
-- set_current_state (TASK_UNINTERRUPTIBLE);
-- schedule_timeout (HZ);
-- }
--
-- /**********************************************************************/
-- /* No more threads. No more portals, router or comms callbacks!
-- * I control the horizontals and the verticals...
-- */
--
--#if MULTIRAIL_EKC
-- LASSERT (list_empty (&kqswnal_data.kqn_readyrxds));
- LASSERT (list_empty (&kqswnal_data.kqn_delayedtxds));
- LASSERT (list_empty (&kqswnal_data.kqn_delayedfwds));
--#endif
--
-- /**********************************************************************/
- /* Complete any blocked forwarding packets with error
- /* Complete any blocked forwarding packets, with error
-- */
--
-- while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq))
-- {
-- kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
- kpr_fwd_desc_t, kprfd_list);
- list_del (&fwd->kprfd_list);
- kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
- }
-
- while (!list_empty (&kqswnal_data.kqn_delayedfwds))
- {
- kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next,
-- kpr_fwd_desc_t, kprfd_list);
-- list_del (&fwd->kprfd_list);
- kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
- kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -ESHUTDOWN);
-- }
--
-- /**********************************************************************/
- /* Wait for router to complete any packets I sent her
- */
- /* finalise router and portals lib */
--
-- kpr_deregister (&kqswnal_data.kqn_router);
--
- if (do_lib_fini)
- lib_fini (&kqswnal_lib);
--
-- /**********************************************************************/
-- /* Unmap message buffers and free all descriptors and buffers
-- */
--
--#if MULTIRAIL_EKC
-- /* FTTB, we need to unmap any remaining mapped memory. When
-- * ep_dvma_release() get fixed (and releases any mappings in the
-- * region), we can delete all the code from here --------> */
--
- for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx =ktx->ktx_alloclist){
- /* If ktx has a buffer, it got mapped; unmap now. NB only
- * the pre-mapped stuff is still mapped since all tx descs
- * must be idle */
- if (kqswnal_data.kqn_txds != NULL) {
- int i;
--
- if (ktx->ktx_buffer != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_tx_nmh,
- &ktx->ktx_ebuffer);
- for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) {
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
-
- /* If ktx has a buffer, it got mapped; unmap now.
- * NB only the pre-mapped stuff is still mapped
- * since all tx descs must be idle */
-
- if (ktx->ktx_buffer != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_tx_nmh,
- &ktx->ktx_ebuffer);
- }
-- }
--
- for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx =krx->krx_alloclist){
- /* If krx_kiov[0].kiov_page got allocated, it got mapped.
- * NB subsequent pages get merged */
- if (kqswnal_data.kqn_rxds != NULL) {
- int i;
--
- if (krx->krx_kiov[0].kiov_page != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_rx_nmh,
- &krx->krx_elanbuffer);
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
-
- /* If krx_kiov[0].kiov_page got allocated, it got mapped.
- * NB subsequent pages get merged */
-
- if (krx->krx_kiov[0].kiov_page != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_rx_nmh,
- &krx->krx_elanbuffer);
- }
-- }
-- /* <----------- to here */
--
-- if (kqswnal_data.kqn_ep_rx_nmh != NULL)
- ep_dvma_release(kqswnal_data.kqn_ep,kqswnal_data.kqn_ep_rx_nmh);
- ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh);
--
-- if (kqswnal_data.kqn_ep_tx_nmh != NULL)
- ep_dvma_release(kqswnal_data.kqn_ep,kqswnal_data.kqn_ep_tx_nmh);
- ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh);
--#else
-- if (kqswnal_data.kqn_eprxdmahandle != NULL)
-- {
-- elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
-- kqswnal_data.kqn_eprxdmahandle, 0,
-- KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
-- KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE);
--
-- elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
-- kqswnal_data.kqn_eprxdmahandle);
-- }
--
-- if (kqswnal_data.kqn_eptxdmahandle != NULL)
-- {
-- elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
-- kqswnal_data.kqn_eptxdmahandle, 0,
-- KQSW_NTXMSGPAGES * (KQSW_NTXMSGS +
-- KQSW_NNBLK_TXMSGS));
--
-- elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
-- kqswnal_data.kqn_eptxdmahandle);
-- }
--#endif
--
- while (kqswnal_data.kqn_txds != NULL) {
- ktx = kqswnal_data.kqn_txds;
- if (kqswnal_data.kqn_txds != NULL)
- {
- int i;
--
- if (ktx->ktx_buffer != NULL)
- PORTAL_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
- for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
- {
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
--
- kqswnal_data.kqn_txds = ktx->ktx_alloclist;
- PORTAL_FREE(ktx, sizeof(*ktx));
- if (ktx->ktx_buffer != NULL)
- PORTAL_FREE(ktx->ktx_buffer,
- KQSW_TX_BUFFER_SIZE);
- }
-
- PORTAL_FREE(kqswnal_data.kqn_txds,
- sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
- KQSW_NNBLK_TXMSGS));
-- }
--
- while (kqswnal_data.kqn_rxds != NULL) {
- int i;
- if (kqswnal_data.kqn_rxds != NULL)
- {
- int i;
- int j;
--
- krx = kqswnal_data.kqn_rxds;
- for (i = 0; i < krx->krx_npages; i++)
- if (krx->krx_kiov[i].kiov_page != NULL)
- __free_page (krx->krx_kiov[i].kiov_page);
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
- {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
--
- kqswnal_data.kqn_rxds = krx->krx_alloclist;
- PORTAL_FREE(krx, sizeof (*krx));
- for (j = 0; j < krx->krx_npages; j++)
- if (krx->krx_kiov[j].kiov_page != NULL)
- __free_page (krx->krx_kiov[j].kiov_page);
- }
-
- PORTAL_FREE(kqswnal_data.kqn_rxds,
- sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
- KQSW_NRXMSGS_LARGE));
-- }
--
-- /* resets flags, pointers to NULL etc */
-- memset(&kqswnal_data, 0, sizeof (kqswnal_data));
--
-- CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
--
-- printk (KERN_INFO "Lustre: Routing QSW NAL unloaded (final mem %d)\n",
-- atomic_read(&portal_kmemory));
--}
--
- static int __init
- kqswnal_initialise (void)
-static int
-kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
- ptl_ni_limits_t *requested_limits,
- ptl_ni_limits_t *actual_limits)
--{
--#if MULTIRAIL_EKC
-- EP_RAILMASK all_rails = EP_RAILMASK_ALL;
--#else
-- ELAN3_DMA_REQUEST dmareq;
--#endif
-- int rc;
-- int i;
- kqswnal_rx_t *krx;
- kqswnal_tx_t *ktx;
-- int elan_page_idx;
- ptl_process_id_t my_process_id;
-- int pkmem = atomic_read(&portal_kmemory);
--
- LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
- LASSERT (nal == &kqswnal_api);
--
- CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
- if (nal->nal_refct != 0) {
- if (actual_limits != NULL)
- *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits;
- /* This module got the first ref */
- PORTAL_MODULE_USE;
- return (PTL_OK);
- }
--
- kqswnal_api.forward = kqswnal_forward;
- kqswnal_api.shutdown = kqswnal_shutdown;
- kqswnal_api.yield = kqswnal_yield;
- kqswnal_api.validate = NULL; /* our api validate is a NOOP */
- kqswnal_api.lock = kqswnal_lock;
- kqswnal_api.unlock = kqswnal_unlock;
- kqswnal_api.nal_data = &kqswnal_data;
- LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
--
- kqswnal_lib.nal_data = &kqswnal_data;
- CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
--
- memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success));
- memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed));
- #if MULTIRAIL_EKC
- kqswnal_rpc_failed.Data[0] = -ECONNREFUSED;
- #else
- kqswnal_rpc_failed.Status = -ECONNREFUSED;
- #endif
-- /* ensure all pointers NULL etc */
-- memset (&kqswnal_data, 0, sizeof (kqswnal_data));
-
- kqswnal_data.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
- kqswnal_data.kqn_copy_small_fwd = KQSW_COPY_SMALL_FWD;
-
- kqswnal_data.kqn_cb = &kqswnal_lib;
--
-- INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
-- INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
-- INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
-- spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
-- init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq);
-- INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq);
--
-- INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds);
-- INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
-- INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
--
-- spin_lock_init (&kqswnal_data.kqn_sched_lock);
-- init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
--
- spin_lock_init (&kqswnal_data.kqn_statelock);
- /* Leave kqn_rpc_success zeroed */
-#if MULTIRAIL_EKC
- kqswnal_data.kqn_rpc_failed.Data[0] = -ECONNREFUSED;
-#else
- kqswnal_data.kqn_rpc_failed.Status = -ECONNREFUSED;
-#endif
--
-- /* pointers/lists/locks initialised */
-- kqswnal_data.kqn_init = KQN_INIT_DATA;
-
-
--#if MULTIRAIL_EKC
-- kqswnal_data.kqn_ep = ep_system();
-- if (kqswnal_data.kqn_ep == NULL) {
-- CERROR("Can't initialise EKC\n");
- return (-ENODEV);
- kqswnal_shutdown(nal);
- return (PTL_IFACE_INVALID);
-- }
--
-- if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
-- CERROR("Can't get elan ID\n");
- kqswnal_finalise();
- return (-ENODEV);
- kqswnal_shutdown(nal);
- return (PTL_IFACE_INVALID);
-- }
--#else
-- /**********************************************************************/
-- /* Find the first Elan device */
--
-- kqswnal_data.kqn_ep = ep_device (0);
-- if (kqswnal_data.kqn_ep == NULL)
-- {
-- CERROR ("Can't get elan device 0\n");
- return (-ENODEV);
- kqswnal_shutdown(nal);
- return (PTL_IFACE_INVALID);
-- }
--#endif
--
-- kqswnal_data.kqn_nid_offset = 0;
-- kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_ep);
-- kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_ep);
--
-- /**********************************************************************/
-- /* Get the transmitter */
--
-- kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep);
-- if (kqswnal_data.kqn_eptx == NULL)
-- {
-- CERROR ("Can't allocate transmitter\n");
- kqswnal_finalise ();
- return (-ENOMEM);
- kqswnal_shutdown (nal);
- return (PTL_NO_SPACE);
-- }
--
-- /**********************************************************************/
-- /* Get the receivers */
--
-- kqswnal_data.kqn_eprx_small = ep_alloc_rcvr (kqswnal_data.kqn_ep,
-- EP_MSG_SVC_PORTALS_SMALL,
-- KQSW_EP_ENVELOPES_SMALL);
-- if (kqswnal_data.kqn_eprx_small == NULL)
-- {
-- CERROR ("Can't install small msg receiver\n");
- kqswnal_finalise ();
- return (-ENOMEM);
- kqswnal_shutdown (nal);
- return (PTL_NO_SPACE);
-- }
--
-- kqswnal_data.kqn_eprx_large = ep_alloc_rcvr (kqswnal_data.kqn_ep,
-- EP_MSG_SVC_PORTALS_LARGE,
-- KQSW_EP_ENVELOPES_LARGE);
-- if (kqswnal_data.kqn_eprx_large == NULL)
-- {
-- CERROR ("Can't install large msg receiver\n");
- kqswnal_finalise ();
- return (-ENOMEM);
- kqswnal_shutdown (nal);
- return (PTL_NO_SPACE);
-- }
--
-- /**********************************************************************/
-- /* Reserve Elan address space for transmit descriptors NB we may
-- * either send the contents of associated buffers immediately, or
-- * map them for the peer to suck/blow... */
--#if MULTIRAIL_EKC
-- kqswnal_data.kqn_ep_tx_nmh =
-- ep_dvma_reserve(kqswnal_data.kqn_ep,
-- KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
-- EP_PERM_WRITE);
-- if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
-- CERROR("Can't reserve tx dma space\n");
- kqswnal_finalise();
- return (-ENOMEM);
- kqswnal_shutdown(nal);
- return (PTL_NO_SPACE);
-- }
--#else
-- dmareq.Waitfn = DDI_DMA_SLEEP;
-- dmareq.ElanAddr = (E3_Addr) 0;
-- dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
-- dmareq.Perm = ELAN_PERM_REMOTEWRITE;
--
-- rc = elan3_dma_reserve(kqswnal_data.kqn_ep->DmaState,
-- KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
-- &dmareq, &kqswnal_data.kqn_eptxdmahandle);
-- if (rc != DDI_SUCCESS)
-- {
-- CERROR ("Can't reserve rx dma space\n");
- kqswnal_finalise ();
- return (-ENOMEM);
- kqswnal_shutdown (nal);
- return (PTL_NO_SPACE);
-- }
--#endif
-- /**********************************************************************/
-- /* Reserve Elan address space for receive buffers */
--#if MULTIRAIL_EKC
-- kqswnal_data.kqn_ep_rx_nmh =
-- ep_dvma_reserve(kqswnal_data.kqn_ep,
-- KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
-- KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
-- EP_PERM_WRITE);
-- if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
-- CERROR("Can't reserve rx dma space\n");
- kqswnal_finalise();
- return (-ENOMEM);
- kqswnal_shutdown(nal);
- return (PTL_NO_SPACE);
-- }
--#else
-- dmareq.Waitfn = DDI_DMA_SLEEP;
-- dmareq.ElanAddr = (E3_Addr) 0;
-- dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
-- dmareq.Perm = ELAN_PERM_REMOTEWRITE;
--
-- rc = elan3_dma_reserve (kqswnal_data.kqn_ep->DmaState,
-- KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
-- KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
-- &dmareq, &kqswnal_data.kqn_eprxdmahandle);
-- if (rc != DDI_SUCCESS)
-- {
-- CERROR ("Can't reserve rx dma space\n");
- kqswnal_finalise ();
- return (-ENOMEM);
- kqswnal_shutdown (nal);
- return (PTL_NO_SPACE);
-- }
--#endif
-- /**********************************************************************/
-- /* Allocate/Initialise transmit descriptors */
--
- kqswnal_data.kqn_txds = NULL;
- PORTAL_ALLOC(kqswnal_data.kqn_txds,
- sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
- if (kqswnal_data.kqn_txds == NULL)
- {
- kqswnal_shutdown (nal);
- return (PTL_NO_SPACE);
- }
-
- /* clear flags, null pointers etc */
- memset(kqswnal_data.kqn_txds, 0,
- sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
-- for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
-- {
-- int premapped_pages;
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
-- int basepage = i * KQSW_NTXMSGPAGES;
-
- PORTAL_ALLOC (ktx, sizeof(*ktx));
- if (ktx == NULL) {
- kqswnal_finalise ();
- return (-ENOMEM);
- }
-
- ktx->ktx_alloclist = kqswnal_data.kqn_txds;
- kqswnal_data.kqn_txds = ktx;
--
-- PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
-- if (ktx->ktx_buffer == NULL)
-- {
- kqswnal_finalise ();
- return (-ENOMEM);
- kqswnal_shutdown (nal);
- return (PTL_NO_SPACE);
-- }
--
-- /* Map pre-allocated buffer NOW, to save latency on transmit */
-- premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
-- KQSW_TX_BUFFER_SIZE);
--#if MULTIRAIL_EKC
-- ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-- ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
-- kqswnal_data.kqn_ep_tx_nmh, basepage,
-- &all_rails, &ktx->ktx_ebuffer);
--#else
-- elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState,
-- kqswnal_data.kqn_eptxdmahandle,
-- ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
-- basepage, &ktx->ktx_ebuffer);
--#endif
-- ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
-- ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
--
-- INIT_LIST_HEAD (&ktx->ktx_delayed_list);
--
-- ktx->ktx_state = KTX_IDLE;
-#if MULTIRAIL_EKC
- ktx->ktx_rail = -1; /* unset rail */
-#endif
-- ktx->ktx_isnblk = (i >= KQSW_NTXMSGS);
-- list_add_tail (&ktx->ktx_list,
-- ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds :
-- &kqswnal_data.kqn_idletxds);
-- }
--
-- /**********************************************************************/
-- /* Allocate/Initialise receive descriptors */
--
- kqswnal_data.kqn_rxds = NULL;
- PORTAL_ALLOC (kqswnal_data.kqn_rxds,
- sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
- if (kqswnal_data.kqn_rxds == NULL)
- {
- kqswnal_shutdown (nal);
- return (PTL_NO_SPACE);
- }
-
- memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
- sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
-
-- elan_page_idx = 0;
-- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
-- {
--#if MULTIRAIL_EKC
-- EP_NMD elanbuffer;
--#else
-- E3_Addr elanbuffer;
--#endif
-- int j;
-
- PORTAL_ALLOC(krx, sizeof(*krx));
- if (krx == NULL) {
- kqswnal_finalise();
- return (-ENOSPC);
- }
-
- krx->krx_alloclist = kqswnal_data.kqn_rxds;
- kqswnal_data.kqn_rxds = krx;
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
--
-- if (i < KQSW_NRXMSGS_SMALL)
-- {
-- krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
-- krx->krx_eprx = kqswnal_data.kqn_eprx_small;
-- }
-- else
-- {
-- krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
-- krx->krx_eprx = kqswnal_data.kqn_eprx_large;
-- }
--
-- LASSERT (krx->krx_npages > 0);
-- for (j = 0; j < krx->krx_npages; j++)
-- {
-- struct page *page = alloc_page(GFP_KERNEL);
--
-- if (page == NULL) {
- kqswnal_finalise ();
- return (-ENOMEM);
- kqswnal_shutdown (nal);
- return (PTL_NO_SPACE);
-- }
--
-- krx->krx_kiov[j].kiov_page = page;
-- LASSERT(page_address(page) != NULL);
--
--#if MULTIRAIL_EKC
-- ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-- page_address(page),
-- PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
-- elan_page_idx, &all_rails, &elanbuffer);
--
-- if (j == 0) {
-- krx->krx_elanbuffer = elanbuffer;
-- } else {
-- rc = ep_nmd_merge(&krx->krx_elanbuffer,
-- &krx->krx_elanbuffer,
-- &elanbuffer);
-- /* NB contiguous mapping */
-- LASSERT(rc);
-- }
--#else
-- elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState,
-- kqswnal_data.kqn_eprxdmahandle,
-- page_address(page),
-- PAGE_SIZE, elan_page_idx,
-- &elanbuffer);
-- if (j == 0)
-- krx->krx_elanbuffer = elanbuffer;
--
-- /* NB contiguous mapping */
-- LASSERT (elanbuffer == krx->krx_elanbuffer + j * PAGE_SIZE);
--#endif
-- elan_page_idx++;
--
-- }
-- }
-- LASSERT (elan_page_idx ==
-- (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) +
-- (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE));
--
-- /**********************************************************************/
-- /* Network interface ready to initialise */
--
- rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni);
- if (rc != 0)
- my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
- my_process_id.pid = requested_pid;
-
- rc = lib_init(&kqswnal_lib, nal, my_process_id,
- requested_limits, actual_limits);
- if (rc != PTL_OK)
-- {
- CERROR ("PtlNIInit failed %d\n", rc);
- kqswnal_finalise ();
- return (-ENOMEM);
- CERROR ("lib_init failed %d\n", rc);
- kqswnal_shutdown (nal);
- return (rc);
-- }
--
- kqswnal_data.kqn_init = KQN_INIT_PTL;
- kqswnal_data.kqn_init = KQN_INIT_LIB;
--
-- /**********************************************************************/
-- /* Queue receives, now that it's OK to run their completion callbacks */
--
- for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx =krx->krx_alloclist){
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
- {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
-
-- /* NB this enqueue can allocate/sleep (attr == 0) */
- krx->krx_state = KRX_POSTED;
--#if MULTIRAIL_EKC
-- rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
-- &krx->krx_elanbuffer, 0);
--#else
-- rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
-- krx->krx_elanbuffer,
-- krx->krx_npages * PAGE_SIZE, 0);
--#endif
-- if (rc != EP_SUCCESS)
-- {
-- CERROR ("failed ep_queue_receive %d\n", rc);
- kqswnal_finalise ();
- return (-ENOMEM);
- kqswnal_shutdown (nal);
- return (PTL_FAIL);
-- }
-- }
--
-- /**********************************************************************/
-- /* Spawn scheduling threads */
-- for (i = 0; i < num_online_cpus(); i++) {
-- rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
-- if (rc != 0)
-- {
-- CERROR ("failed to spawn scheduling thread: %d\n", rc);
- kqswnal_finalise ();
- return (rc);
- kqswnal_shutdown (nal);
- return (PTL_FAIL);
-- }
-- }
--
-- /**********************************************************************/
-- /* Connect to the router */
-- rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface);
-- CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc);
--
- rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL);
- rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL);
-- if (rc != 0) {
-- CERROR ("Can't initialise command interface (rc = %d)\n", rc);
- kqswnal_finalise ();
- return (rc);
- kqswnal_shutdown (nal);
- return (PTL_FAIL);
-- }
-
- #if CONFIG_SYSCTL
- /* Press on regardless even if registering sysctl doesn't work */
- kqswnal_data.kqn_sysctl = register_sysctl_table (kqswnal_top_ctl_table, 0);
- #endif
--
- PORTAL_SYMBOL_REGISTER(kqswnal_ni);
-- kqswnal_data.kqn_init = KQN_INIT_ALL;
--
-- printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d "
-- "(Routing %s, initial mem %d)\n",
-- kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes,
-- kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled",
-- pkmem);
--
- return (0);
- return (PTL_OK);
-}
-
-void __exit
-kqswnal_finalise (void)
-{
-#if CONFIG_SYSCTL
- if (kqswnal_tunables.kqn_sysctl != NULL)
- unregister_sysctl_table (kqswnal_tunables.kqn_sysctl);
-#endif
- PtlNIFini(kqswnal_ni);
-
- ptl_unregister_nal(QSWNAL);
--}
-
-static int __init
-kqswnal_initialise (void)
-{
- int rc;
-
- kqswnal_api.nal_ni_init = kqswnal_startup;
- kqswnal_api.nal_ni_fini = kqswnal_shutdown;
-
- /* Initialise dynamic tunables to defaults once only */
- kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS;
- kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
-
- rc = ptl_register_nal(QSWNAL, &kqswnal_api);
- if (rc != PTL_OK) {
- CERROR("Can't register QSWNAL: %d\n", rc);
- return (-ENOMEM); /* or something... */
- }
-
- /* Pure gateways, and the workaround for 'EKC blocks forever until
- * the service is active' want the NAL started up at module load
- * time... */
- rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni);
- if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
- ptl_unregister_nal(QSWNAL);
- return (-ENODEV);
- }
--
-#if CONFIG_SYSCTL
- /* Press on regardless even if registering sysctl doesn't work */
- kqswnal_tunables.kqn_sysctl =
- register_sysctl_table (kqswnal_top_ctl_table, 0);
-#endif
- return (0);
-}
--
--MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
--MODULE_DESCRIPTION("Kernel Quadrics/Elan NAL v1.01");
--MODULE_LICENSE("GPL");
--
--module_init (kqswnal_initialise);
--module_exit (kqswnal_finalise);
-
- EXPORT_SYMBOL (kqswnal_ni);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
- * Basic library routines.
- * Basic library routines.
-- *
-- */
--
--#ifndef _QSWNAL_H
--#define _QSWNAL_H
--#ifndef EXPORT_SYMTAB
--# define EXPORT_SYMTAB
--#endif
--
--#include <qsnet/kernel.h>
--#undef printf /* nasty QSW #define */
--
--#include <linux/config.h>
--#include <linux/module.h>
--
--#if MULTIRAIL_EKC
--# include <elan/epcomms.h>
--#else
--# include <elan3/elanregs.h>
--# include <elan3/elandev.h>
--# include <elan3/elanvp.h>
--# include <elan3/elan3mmu.h>
--# include <elan3/elanctxt.h>
--# include <elan3/elandebug.h>
--# include <elan3/urom_addrs.h>
--# include <elan3/busops.h>
--# include <elan3/kcomm.h>
--#endif
--
--#include <linux/kernel.h>
--#include <linux/mm.h>
--#include <linux/string.h>
--#include <linux/stat.h>
--#include <linux/errno.h>
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--#include <linux/locks.h> /* wait_on_buffer */
--#else
--#include <linux/buffer_head.h> /* wait_on_buffer */
--#endif
--#include <linux/unistd.h>
--#include <net/sock.h>
--#include <linux/uio.h>
--
--#include <asm/system.h>
--#include <asm/uaccess.h>
--
--#include <linux/fs.h>
--#include <linux/file.h>
--#include <linux/stat.h>
--#include <linux/list.h>
--#include <linux/sysctl.h>
--#include <asm/segment.h>
--
--#define DEBUG_SUBSYSTEM S_QSWNAL
--
--#include <linux/kp30.h>
--#include <linux/kpr.h>
--#include <portals/p30.h>
--#include <portals/lib-p30.h>
--#include <portals/nal.h>
--
--#define KQSW_CHECKSUM 0
--#if KQSW_CHECKSUM
--typedef unsigned long kqsw_csum_t;
--#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t))
--#else
--#define KQSW_CSUM_SIZE 0
--#endif
--#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE)
--
--/*
-- * Performance Tuning defines
-- * NB no mention of PAGE_SIZE for interoperability
-- */
--#define KQSW_MAXPAYLOAD PTL_MTU
--#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */
--
--#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */
--
--#define KQSW_NTXMSGS 8 /* # normal transmit messages */
--#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */
--
--#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */
--#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */
--
--#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */
--#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */
--
--#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */
--
- #define KQSW_OPTIMIZED_GETS 1 /* optimized gets? */
-#define KQSW_OPTIMIZED_GETS 1 /* optimize gets >= this size */
-#define KQSW_OPTIMIZED_PUTS (32<<10) /* optimize puts >= this size */
--#define KQSW_COPY_SMALL_FWD 0 /* copy small fwd messages to pre-mapped buffer? */
--
--/*
-- * derived constants
-- */
--
--#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG)
--/* The pre-allocated tx buffer (hdr + small payload) */
--
--#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1)
--/* Reserve elan address space for pre-allocated and pre-mapped transmit
-- * buffer and a full payload too. Extra pages allow for page alignment */
--
--#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD))
--/* receive hdr/payload always contiguous and page aligned */
--#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE)
--
--#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD))
--/* receive hdr/payload always contiguous and page aligned */
--#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE)
--/* biggest complete packet we can receive (or transmit) */
--
--/* Remote memory descriptor */
--typedef struct
--{
-- __u32 kqrmd_nfrag; /* # frags */
--#if MULTIRAIL_EKC
-- EP_NMD kqrmd_frag[0]; /* actual frags */
--#else
-- EP_IOVEC kqrmd_frag[0]; /* actual frags */
--#endif
--} kqswnal_remotemd_t;
--
- typedef struct kqswnal_rx
-typedef struct
--{
-- struct list_head krx_list; /* enqueue -> thread */
- struct kqswnal_rx *krx_alloclist; /* stack in kqn_rxds */
-- EP_RCVR *krx_eprx; /* port to post receives to */
-- EP_RXD *krx_rxd; /* receive descriptor (for repost) */
--#if MULTIRAIL_EKC
-- EP_NMD krx_elanbuffer; /* contiguous Elan buffer */
--#else
-- E3_Addr krx_elanbuffer; /* contiguous Elan buffer */
--#endif
-- int krx_npages; /* # pages in receive buffer */
-- int krx_nob; /* Number Of Bytes received into buffer */
-- int krx_rpc_reply_needed; /* peer waiting for EKC RPC reply */
- int krx_rpc_reply_sent; /* rpc reply sent */
- int krx_rpc_reply_status; /* what status to send */
- int krx_state; /* what this RX is doing */
-- atomic_t krx_refcount; /* how to tell when rpc is done */
-- kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */
-- ptl_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
--} kqswnal_rx_t;
--
- typedef struct kqswnal_tx
-#define KRX_POSTED 1 /* receiving */
-#define KRX_PARSE 2 /* ready to be parsed */
-#define KRX_COMPLETING 3 /* waiting to be completed */
-
-
-typedef struct
--{
-- struct list_head ktx_list; /* enqueue idle/active */
-- struct list_head ktx_delayed_list; /* enqueue delayedtxds */
- struct kqswnal_tx *ktx_alloclist; /* stack in kqn_txds */
-- unsigned int ktx_isnblk:1; /* reserved descriptor? */
-- unsigned int ktx_state:7; /* What I'm doing */
-- unsigned int ktx_firsttmpfrag:1; /* ktx_frags[0] is in my ebuffer ? 0 : 1 */
-- uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */
-- int ktx_npages; /* pages reserved for mapping messages */
-- int ktx_nmappedpages; /* # pages mapped for current message */
-- int ktx_port; /* destination ep port */
-- ptl_nid_t ktx_nid; /* destination node */
- void *ktx_args[2]; /* completion passthru */
- void *ktx_args[3]; /* completion passthru */
-- char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */
-- unsigned long ktx_launchtime; /* when (in jiffies) the transmit was launched */
--
-- /* debug/info fields */
-- pid_t ktx_launcher; /* pid of launching process */
- ptl_hdr_t *ktx_wire_hdr; /* portals header (wire endian) */
--
-- int ktx_nfrag; /* # message frags */
--#if MULTIRAIL_EKC
- int ktx_rail; /* preferred rail */
-- EP_NMD ktx_ebuffer; /* elan mapping of ktx_buffer */
-- EP_NMD ktx_frags[EP_MAXFRAG];/* elan mapping of msg frags */
--#else
-- E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */
-- EP_IOVEC ktx_frags[EP_MAXFRAG];/* msg frags (elan vaddrs) */
--#endif
--} kqswnal_tx_t;
--
--#define KTX_IDLE 0 /* on kqn_(nblk_)idletxds */
- #define KTX_SENDING 1 /* local send */
- #define KTX_FORWARDING 2 /* routing a packet */
- #define KTX_GETTING 3 /* local optimised get */
-#define KTX_FORWARDING 1 /* sending a forwarded packet */
-#define KTX_SENDING 2 /* normal send */
-#define KTX_GETTING 3 /* sending optimised get */
-#define KTX_PUTTING 4 /* sending optimised put */
-#define KTX_RDMAING 5 /* handling optimised put/get */
-
-typedef struct
-{
- /* dynamic tunables... */
- int kqn_optimized_puts; /* optimized PUTs? */
- int kqn_optimized_gets; /* optimized GETs? */
-#if CONFIG_SYSCTL
- struct ctl_table_header *kqn_sysctl; /* sysctl interface */
-#endif
-} kqswnal_tunables_t;
--
--typedef struct
--{
-- char kqn_init; /* what's been initialised */
-- char kqn_shuttingdown; /* I'm trying to shut down */
- atomic_t kqn_nthreads; /* # threads not terminated */
- atomic_t kqn_nthreads_running;/* # threads still running */
-
- int kqn_optimized_gets; /* optimized GETs? */
- int kqn_copy_small_fwd; /* fwd small msgs from pre-allocated buffer? */
- atomic_t kqn_nthreads; /* # threads running */
--
- #if CONFIG_SYSCTL
- struct ctl_table_header *kqn_sysctl; /* sysctl interface */
- #endif
- kqswnal_rx_t *kqn_rxds; /* stack of all the receive descriptors */
- kqswnal_tx_t *kqn_txds; /* stack of all the transmit descriptors */
- kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */
- kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */
--
-- struct list_head kqn_idletxds; /* transmit descriptors free to use */
-- struct list_head kqn_nblk_idletxds; /* reserved free transmit descriptors */
-- struct list_head kqn_activetxds; /* transmit descriptors being used */
-- spinlock_t kqn_idletxd_lock; /* serialise idle txd access */
-- wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */
-- struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */
- atomic_t kqn_pending_txs; /* # transmits being prepped */
--
-- spinlock_t kqn_sched_lock; /* serialise packet schedulers */
-- wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */
--
-- struct list_head kqn_readyrxds; /* rxds full of data */
-- struct list_head kqn_delayedfwds; /* delayed forwards */
-- struct list_head kqn_delayedtxds; /* delayed transmits */
--
- spinlock_t kqn_statelock; /* cb_cli/cb_sti */
- nal_cb_t *kqn_cb; /* -> kqswnal_lib */
--#if MULTIRAIL_EKC
-- EP_SYS *kqn_ep; /* elan system */
-- EP_NMH *kqn_ep_tx_nmh; /* elan reserved tx vaddrs */
-- EP_NMH *kqn_ep_rx_nmh; /* elan reserved rx vaddrs */
--#else
-- EP_DEV *kqn_ep; /* elan device */
-- ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */
-- ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */
--#endif
-- EP_XMTR *kqn_eptx; /* elan transmitter */
-- EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */
-- EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */
-- kpr_router_t kqn_router; /* connection to Kernel Portals Router module */
--
-- ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */
-- int kqn_nnodes; /* this cluster's size */
-- int kqn_elanid; /* this nodes's elan ID */
-
- EP_STATUSBLK kqn_rpc_success; /* preset RPC reply status blocks */
- EP_STATUSBLK kqn_rpc_failed;
--} kqswnal_data_t;
--
--/* kqn_init state */
--#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */
--#define KQN_INIT_DATA 1
- #define KQN_INIT_PTL 2
-#define KQN_INIT_LIB 2
--#define KQN_INIT_ALL 3
-
- extern nal_cb_t kqswnal_lib;
- extern nal_t kqswnal_api;
- extern kqswnal_data_t kqswnal_data;
--
- /* global pre-prepared replies to keep off the stack */
- extern EP_STATUSBLK kqswnal_rpc_success;
- extern EP_STATUSBLK kqswnal_rpc_failed;
-extern lib_nal_t kqswnal_lib;
-extern nal_t kqswnal_api;
-extern kqswnal_tunables_t kqswnal_tunables;
-extern kqswnal_data_t kqswnal_data;
--
--extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
--extern void kqswnal_rxhandler(EP_RXD *rxd);
--extern int kqswnal_scheduler (void *);
--extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
- extern void kqswnal_dma_reply_complete (EP_RXD *rxd);
- extern void kqswnal_requeue_rx (kqswnal_rx_t *krx);
-extern void kqswnal_rx_done (kqswnal_rx_t *krx);
--
--static inline ptl_nid_t
--kqswnal_elanid2nid (int elanid)
--{
-- return (kqswnal_data.kqn_nid_offset + elanid);
--}
--
--static inline int
--kqswnal_nid2elanid (ptl_nid_t nid)
--{
-- /* not in this cluster? */
-- if (nid < kqswnal_data.kqn_nid_offset ||
-- nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes)
-- return (-1);
--
-- return (nid - kqswnal_data.kqn_nid_offset);
-}
-
-static inline ptl_nid_t
-kqswnal_rx_nid(kqswnal_rx_t *krx)
-{
- return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)));
--}
--
--static inline int
--kqswnal_pages_spanned (void *base, int nob)
--{
-- unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT;
-- unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT;
--
-- LASSERT (last_page >= first_page); /* can't wrap address space */
-- return (last_page - first_page + 1);
--}
--
--#if KQSW_CHECKSUM
--static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob)
--{
-- unsigned char *ptr = (unsigned char *)base;
--
-- while (nob-- > 0)
-- sum += *ptr++;
--
-- return (sum);
--}
--#endif
--
- static inline void kqswnal_rx_done (kqswnal_rx_t *krx)
-static inline void kqswnal_rx_decref (kqswnal_rx_t *krx)
--{
-- LASSERT (atomic_read (&krx->krx_refcount) > 0);
-- if (atomic_dec_and_test (&krx->krx_refcount))
- kqswnal_requeue_rx(krx);
- kqswnal_rx_done(krx);
--}
--
--#if MULTIRAIL_EKC
--# ifndef EP_RAILMASK_ALL
--# error "old (unsupported) version of EKC headers"
--# endif
--#else
--/* multirail defines these in <elan/epcomms.h> */
--#define EP_MSG_SVC_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */
--#define EP_MSG_SVC_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */
--/* NB small/large message sizes are GLOBAL constants */
--
--/* A minimal attempt to minimise inline #ifdeffing */
--
--#define EP_SUCCESS ESUCCESS
--#define EP_ENOMEM ENOMEM
--
--static inline EP_XMTR *
--ep_alloc_xmtr(EP_DEV *e)
--{
-- return (ep_alloc_large_xmtr(e));
--}
--
--static inline EP_RCVR *
--ep_alloc_rcvr(EP_DEV *e, int svc, int nenv)
--{
-- return (ep_install_large_rcvr(e, svc, nenv));
--}
--
--static inline void
--ep_free_xmtr(EP_XMTR *x)
--{
-- ep_free_large_xmtr(x);
--}
--
--static inline void
--ep_free_rcvr(EP_RCVR *r)
--{
-- ep_remove_large_rcvr(r);
--}
--#endif
--
--#endif /* _QSWNAL_H */
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002 Cluster File Systems, Inc.
-- * Author: Eric Barton <eric@bartonsoftware.com>
-- *
-- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
-- * W. Marcus Miller - Based on ksocknal
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#include "qswnal.h"
-
- EP_STATUSBLK kqswnal_rpc_success;
- EP_STATUSBLK kqswnal_rpc_failed;
--
--/*
-- * LIB functions follow
-- *
-- */
- static ptl_err_t
- kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr,
- size_t len)
- {
- CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n",
- nal->ni.nid, len, src_addr, dst_addr );
- memcpy( dst_addr, src_addr, len );
-
- return (PTL_OK);
- }
-
- static ptl_err_t
- kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr,
- size_t len)
- {
- CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n",
- nal->ni.nid, len, src_addr, dst_addr );
- memcpy( dst_addr, src_addr, len );
-
- return (PTL_OK);
- }
-
- static void *
- kqswnal_malloc(nal_cb_t *nal, size_t len)
- {
- void *buf;
-
- PORTAL_ALLOC(buf, len);
- return (buf);
- }
-
- static void
- kqswnal_free(nal_cb_t *nal, void *buf, size_t len)
- {
- PORTAL_FREE(buf, len);
- }
-
- static void
- kqswnal_printf (nal_cb_t * nal, const char *fmt, ...)
- {
- va_list ap;
- char msg[256];
-
- va_start (ap, fmt);
- vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
- va_end (ap);
-
- msg[sizeof (msg) - 1] = 0; /* ensure terminated */
-
- CDEBUG (D_NET, "%s", msg);
- }
-
- #if (defined(CONFIG_SPARC32) || defined(CONFIG_SPARC64))
- # error "Can't save/restore irq contexts in different procedures"
- #endif
-
- static void
- kqswnal_cli(nal_cb_t *nal, unsigned long *flags)
- {
- kqswnal_data_t *data= nal->nal_data;
-
- spin_lock_irqsave(&data->kqn_statelock, *flags);
- }
-
-
- static void
- kqswnal_sti(nal_cb_t *nal, unsigned long *flags)
- {
- kqswnal_data_t *data= nal->nal_data;
-
- spin_unlock_irqrestore(&data->kqn_statelock, *flags);
- }
-
-
--static int
- kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
-kqswnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
--{
- if (nid == nal->ni.nid)
- if (nid == nal->libnal_ni.ni_pid.nid)
-- *dist = 0; /* it's me */
-- else if (kqswnal_nid2elanid (nid) >= 0)
-- *dist = 1; /* it's my peer */
-- else
-- *dist = 2; /* via router */
-- return (0);
--}
--
--void
--kqswnal_notify_peer_down(kqswnal_tx_t *ktx)
--{
-- struct timeval now;
-- time_t then;
--
-- do_gettimeofday (&now);
-- then = now.tv_sec - (jiffies - ktx->ktx_launchtime)/HZ;
--
-- kpr_notify(&kqswnal_data.kqn_router, ktx->ktx_nid, 0, then);
--}
--
--void
--kqswnal_unmap_tx (kqswnal_tx_t *ktx)
--{
--#if MULTIRAIL_EKC
-- int i;
-
- ktx->ktx_rail = -1; /* unset rail */
--#endif
--
-- if (ktx->ktx_nmappedpages == 0)
-- return;
--
--#if MULTIRAIL_EKC
-- CDEBUG(D_NET, "%p unloading %d frags starting at %d\n",
-- ktx, ktx->ktx_nfrag, ktx->ktx_firsttmpfrag);
--
-- for (i = ktx->ktx_firsttmpfrag; i < ktx->ktx_nfrag; i++)
-- ep_dvma_unload(kqswnal_data.kqn_ep,
-- kqswnal_data.kqn_ep_tx_nmh,
-- &ktx->ktx_frags[i]);
--#else
-- CDEBUG (D_NET, "%p[%d] unloading pages %d for %d\n",
-- ktx, ktx->ktx_nfrag, ktx->ktx_basepage, ktx->ktx_nmappedpages);
--
-- LASSERT (ktx->ktx_nmappedpages <= ktx->ktx_npages);
-- LASSERT (ktx->ktx_basepage + ktx->ktx_nmappedpages <=
-- kqswnal_data.kqn_eptxdmahandle->NumDvmaPages);
--
-- elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
-- kqswnal_data.kqn_eptxdmahandle,
-- ktx->ktx_basepage, ktx->ktx_nmappedpages);
--#endif
-- ktx->ktx_nmappedpages = 0;
--}
--
--int
--kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int offset, int nob, int niov, ptl_kiov_t *kiov)
--{
-- int nfrags = ktx->ktx_nfrag;
-- int nmapped = ktx->ktx_nmappedpages;
-- int maxmapped = ktx->ktx_npages;
-- uint32_t basepage = ktx->ktx_basepage + nmapped;
-- char *ptr;
--#if MULTIRAIL_EKC
-- EP_RAILMASK railmask;
- int rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
- EP_RAILMASK_ALL,
- kqswnal_nid2elanid(ktx->ktx_nid));
-
- int rail;
-
- if (ktx->ktx_rail < 0)
- ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
- EP_RAILMASK_ALL,
- kqswnal_nid2elanid(ktx->ktx_nid));
- rail = ktx->ktx_rail;
-- if (rail < 0) {
-- CERROR("No rails available for "LPX64"\n", ktx->ktx_nid);
-- return (-ENETDOWN);
-- }
-- railmask = 1 << rail;
--#endif
-- LASSERT (nmapped <= maxmapped);
-- LASSERT (nfrags >= ktx->ktx_firsttmpfrag);
-- LASSERT (nfrags <= EP_MAXFRAG);
-- LASSERT (niov > 0);
-- LASSERT (nob > 0);
--
-- /* skip complete frags before 'offset' */
-- while (offset >= kiov->kiov_len) {
-- offset -= kiov->kiov_len;
-- kiov++;
-- niov--;
-- LASSERT (niov > 0);
-- }
--
-- do {
-- int fraglen = kiov->kiov_len - offset;
--
- /* nob exactly spans the iovs */
- LASSERT (fraglen <= nob);
- /* each frag fits in a page */
- /* each page frag is contained in one page */
-- LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
-
- if (fraglen > nob)
- fraglen = nob;
--
-- nmapped++;
-- if (nmapped > maxmapped) {
-- CERROR("Can't map message in %d pages (max %d)\n",
-- nmapped, maxmapped);
-- return (-EMSGSIZE);
-- }
--
-- if (nfrags == EP_MAXFRAG) {
-- CERROR("Message too fragmented in Elan VM (max %d frags)\n",
-- EP_MAXFRAG);
-- return (-EMSGSIZE);
-- }
--
-- /* XXX this is really crap, but we'll have to kmap until
-- * EKC has a page (rather than vaddr) mapping interface */
--
-- ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
--
-- CDEBUG(D_NET,
-- "%p[%d] loading %p for %d, page %d, %d total\n",
-- ktx, nfrags, ptr, fraglen, basepage, nmapped);
--
--#if MULTIRAIL_EKC
-- ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-- ptr, fraglen,
-- kqswnal_data.kqn_ep_tx_nmh, basepage,
-- &railmask, &ktx->ktx_frags[nfrags]);
--
-- if (nfrags == ktx->ktx_firsttmpfrag ||
-- !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1],
-- &ktx->ktx_frags[nfrags - 1],
-- &ktx->ktx_frags[nfrags])) {
-- /* new frag if this is the first or can't merge */
-- nfrags++;
-- }
--#else
-- elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState,
-- kqswnal_data.kqn_eptxdmahandle,
-- ptr, fraglen,
-- basepage, &ktx->ktx_frags[nfrags].Base);
--
-- if (nfrags > 0 && /* previous frag mapped */
-- ktx->ktx_frags[nfrags].Base == /* contiguous with this one */
-- (ktx->ktx_frags[nfrags-1].Base + ktx->ktx_frags[nfrags-1].Len))
-- /* just extend previous */
-- ktx->ktx_frags[nfrags - 1].Len += fraglen;
-- else {
-- ktx->ktx_frags[nfrags].Len = fraglen;
-- nfrags++; /* new frag */
-- }
--#endif
--
-- kunmap (kiov->kiov_page);
--
-- /* keep in loop for failure case */
-- ktx->ktx_nmappedpages = nmapped;
--
-- basepage++;
-- kiov++;
-- niov--;
-- nob -= fraglen;
-- offset = 0;
--
-- /* iov must not run out before end of data */
-- LASSERT (nob == 0 || niov > 0);
--
-- } while (nob > 0);
--
-- ktx->ktx_nfrag = nfrags;
-- CDEBUG (D_NET, "%p got %d frags over %d pages\n",
-- ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages);
--
-- return (0);
--}
--
--int
--kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int offset, int nob,
-- int niov, struct iovec *iov)
--{
-- int nfrags = ktx->ktx_nfrag;
-- int nmapped = ktx->ktx_nmappedpages;
-- int maxmapped = ktx->ktx_npages;
-- uint32_t basepage = ktx->ktx_basepage + nmapped;
--#if MULTIRAIL_EKC
-- EP_RAILMASK railmask;
- int rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
- EP_RAILMASK_ALL,
- kqswnal_nid2elanid(ktx->ktx_nid));
- int rail;
--
- if (ktx->ktx_rail < 0)
- ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
- EP_RAILMASK_ALL,
- kqswnal_nid2elanid(ktx->ktx_nid));
- rail = ktx->ktx_rail;
-- if (rail < 0) {
-- CERROR("No rails available for "LPX64"\n", ktx->ktx_nid);
-- return (-ENETDOWN);
-- }
-- railmask = 1 << rail;
--#endif
-- LASSERT (nmapped <= maxmapped);
-- LASSERT (nfrags >= ktx->ktx_firsttmpfrag);
-- LASSERT (nfrags <= EP_MAXFRAG);
-- LASSERT (niov > 0);
-- LASSERT (nob > 0);
--
-- /* skip complete frags before offset */
-- while (offset >= iov->iov_len) {
-- offset -= iov->iov_len;
-- iov++;
-- niov--;
-- LASSERT (niov > 0);
-- }
--
-- do {
-- int fraglen = iov->iov_len - offset;
- long npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
-
- /* nob exactly spans the iovs */
- LASSERT (fraglen <= nob);
- long npages;
--
- if (fraglen > nob)
- fraglen = nob;
- npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
-
-- nmapped += npages;
-- if (nmapped > maxmapped) {
-- CERROR("Can't map message in %d pages (max %d)\n",
-- nmapped, maxmapped);
-- return (-EMSGSIZE);
-- }
--
-- if (nfrags == EP_MAXFRAG) {
-- CERROR("Message too fragmented in Elan VM (max %d frags)\n",
-- EP_MAXFRAG);
-- return (-EMSGSIZE);
-- }
--
-- CDEBUG(D_NET,
-- "%p[%d] loading %p for %d, pages %d for %ld, %d total\n",
-- ktx, nfrags, iov->iov_base + offset, fraglen,
-- basepage, npages, nmapped);
--
--#if MULTIRAIL_EKC
-- ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-- iov->iov_base + offset, fraglen,
-- kqswnal_data.kqn_ep_tx_nmh, basepage,
-- &railmask, &ktx->ktx_frags[nfrags]);
--
-- if (nfrags == ktx->ktx_firsttmpfrag ||
-- !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1],
-- &ktx->ktx_frags[nfrags - 1],
-- &ktx->ktx_frags[nfrags])) {
-- /* new frag if this is the first or can't merge */
-- nfrags++;
-- }
--#else
-- elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState,
-- kqswnal_data.kqn_eptxdmahandle,
-- iov->iov_base + offset, fraglen,
-- basepage, &ktx->ktx_frags[nfrags].Base);
--
-- if (nfrags > 0 && /* previous frag mapped */
-- ktx->ktx_frags[nfrags].Base == /* contiguous with this one */
-- (ktx->ktx_frags[nfrags-1].Base + ktx->ktx_frags[nfrags-1].Len))
-- /* just extend previous */
-- ktx->ktx_frags[nfrags - 1].Len += fraglen;
-- else {
-- ktx->ktx_frags[nfrags].Len = fraglen;
-- nfrags++; /* new frag */
-- }
--#endif
--
-- /* keep in loop for failure case */
-- ktx->ktx_nmappedpages = nmapped;
--
-- basepage += npages;
-- iov++;
-- niov--;
-- nob -= fraglen;
-- offset = 0;
--
-- /* iov must not run out before end of data */
-- LASSERT (nob == 0 || niov > 0);
--
-- } while (nob > 0);
--
-- ktx->ktx_nfrag = nfrags;
-- CDEBUG (D_NET, "%p got %d frags over %d pages\n",
-- ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages);
--
-- return (0);
--}
--
--
--void
--kqswnal_put_idle_tx (kqswnal_tx_t *ktx)
--{
-- kpr_fwd_desc_t *fwd = NULL;
-- unsigned long flags;
--
-- kqswnal_unmap_tx (ktx); /* release temporary mappings */
-- ktx->ktx_state = KTX_IDLE;
--
-- spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
--
-- list_del (&ktx->ktx_list); /* take off active list */
--
-- if (ktx->ktx_isnblk) {
-- /* reserved for non-blocking tx */
-- list_add (&ktx->ktx_list, &kqswnal_data.kqn_nblk_idletxds);
-- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-- return;
-- }
--
-- list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds);
--
-- /* anything blocking for a tx descriptor? */
- if (!list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */
- if (!kqswnal_data.kqn_shuttingdown &&
- !list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */
-- {
-- CDEBUG(D_NET,"wakeup fwd\n");
--
-- fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
-- kpr_fwd_desc_t, kprfd_list);
-- list_del (&fwd->kprfd_list);
-- }
--
-- wake_up (&kqswnal_data.kqn_idletxd_waitq);
--
-- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
--
-- if (fwd == NULL)
-- return;
--
-- /* schedule packet for forwarding again */
-- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
--
-- list_add_tail (&fwd->kprfd_list, &kqswnal_data.kqn_delayedfwds);
-- wake_up (&kqswnal_data.kqn_sched_waitq);
--
-- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
--}
--
--kqswnal_tx_t *
--kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block)
--{
-- unsigned long flags;
-- kqswnal_tx_t *ktx = NULL;
--
-- for (;;) {
-- spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
-
- if (kqswnal_data.kqn_shuttingdown)
- break;
--
-- /* "normal" descriptor is free */
-- if (!list_empty (&kqswnal_data.kqn_idletxds)) {
-- ktx = list_entry (kqswnal_data.kqn_idletxds.next,
-- kqswnal_tx_t, ktx_list);
-- break;
-- }
-
- /* "normal" descriptor pool is empty */
--
- if (fwd != NULL) { /* forwarded packet => queue for idle txd */
- CDEBUG (D_NET, "blocked fwd [%p]\n", fwd);
- list_add_tail (&fwd->kprfd_list,
- &kqswnal_data.kqn_idletxd_fwdq);
- if (fwd != NULL) /* forwarded packet? */
-- break;
- }
--
-- /* doing a local transmit */
-- if (!may_block) {
-- if (list_empty (&kqswnal_data.kqn_nblk_idletxds)) {
-- CERROR ("intr tx desc pool exhausted\n");
-- break;
-- }
--
-- ktx = list_entry (kqswnal_data.kqn_nblk_idletxds.next,
-- kqswnal_tx_t, ktx_list);
-- break;
-- }
--
-- /* block for idle tx */
--
-- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
--
-- CDEBUG (D_NET, "blocking for tx desc\n");
-- wait_event (kqswnal_data.kqn_idletxd_waitq,
- !list_empty (&kqswnal_data.kqn_idletxds));
- !list_empty (&kqswnal_data.kqn_idletxds) ||
- kqswnal_data.kqn_shuttingdown);
-- }
--
-- if (ktx != NULL) {
-- list_del (&ktx->ktx_list);
-- list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds);
-- ktx->ktx_launcher = current->pid;
- atomic_inc(&kqswnal_data.kqn_pending_txs);
- } else if (fwd != NULL) {
- /* queue forwarded packet until idle txd available */
- CDEBUG (D_NET, "blocked fwd [%p]\n", fwd);
- list_add_tail (&fwd->kprfd_list,
- &kqswnal_data.kqn_idletxd_fwdq);
-- }
--
-- spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
--
-- /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */
-- LASSERT (ktx == NULL || ktx->ktx_nmappedpages == 0);
--
-- return (ktx);
--}
--
--void
--kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
--{
- lib_msg_t *msg;
- lib_msg_t *repmsg = NULL;
-
-- switch (ktx->ktx_state) {
-- case KTX_FORWARDING: /* router asked me to forward this packet */
-- kpr_fwd_done (&kqswnal_data.kqn_router,
-- (kpr_fwd_desc_t *)ktx->ktx_args[0], error);
-- break;
--
- case KTX_SENDING: /* packet sourced locally */
- lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
- case KTX_RDMAING: /* optimized GET/PUT handled */
- case KTX_PUTTING: /* optimized PUT sent */
- case KTX_SENDING: /* normal send */
- lib_finalize (&kqswnal_lib, NULL,
-- (lib_msg_t *)ktx->ktx_args[1],
- (error == 0) ? PTL_OK :
- (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
- (error == 0) ? PTL_OK : PTL_FAIL);
-- break;
-
- case KTX_GETTING: /* Peer has DMA-ed direct? */
- msg = (lib_msg_t *)ktx->ktx_args[1];
--
- if (error == 0) {
- repmsg = lib_fake_reply_msg (&kqswnal_lib,
- ktx->ktx_nid, msg->md);
- if (repmsg == NULL)
- error = -ENOMEM;
- }
-
- if (error == 0) {
- lib_finalize (&kqswnal_lib, ktx->ktx_args[0],
- msg, PTL_OK);
- lib_finalize (&kqswnal_lib, NULL, repmsg, PTL_OK);
- } else {
- lib_finalize (&kqswnal_lib, ktx->ktx_args[0], msg,
- (error == -ENOMEM) ? PTL_NOSPACE : PTL_FAIL);
- }
- case KTX_GETTING: /* optimized GET sent & REPLY received */
- /* Complete the GET with success since we can't avoid
- * delivering a REPLY event; we committed to it when we
- * launched the GET */
- lib_finalize (&kqswnal_lib, NULL,
- (lib_msg_t *)ktx->ktx_args[1], PTL_OK);
- lib_finalize (&kqswnal_lib, NULL,
- (lib_msg_t *)ktx->ktx_args[2],
- (error == 0) ? PTL_OK : PTL_FAIL);
-- break;
--
-- default:
-- LASSERT (0);
-- }
--
-- kqswnal_put_idle_tx (ktx);
--}
--
--static void
--kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
--{
-- kqswnal_tx_t *ktx = (kqswnal_tx_t *)arg;
--
-- LASSERT (txd != NULL);
-- LASSERT (ktx != NULL);
--
-- CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status);
--
-- if (status != EP_SUCCESS) {
--
-- CERROR ("Tx completion to "LPX64" failed: %d\n",
-- ktx->ktx_nid, status);
--
-- kqswnal_notify_peer_down(ktx);
-- status = -EHOSTDOWN;
--
- } else if (ktx->ktx_state == KTX_GETTING) {
- /* RPC completed OK; what did our peer put in the status
- } else switch (ktx->ktx_state) {
-
- case KTX_GETTING:
- case KTX_PUTTING:
- /* RPC completed OK; but what did our peer put in the status
-- * block? */
--#if MULTIRAIL_EKC
-- status = ep_txd_statusblk(txd)->Data[0];
--#else
-- status = ep_txd_statusblk(txd)->Status;
--#endif
- } else {
- break;
-
- case KTX_FORWARDING:
- case KTX_SENDING:
-- status = 0;
- break;
-
- default:
- LBUG();
- break;
-- }
--
-- kqswnal_tx_done (ktx, status);
--}
--
--int
--kqswnal_launch (kqswnal_tx_t *ktx)
--{
-- /* Don't block for transmit descriptor if we're in interrupt context */
-- int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0;
-- int dest = kqswnal_nid2elanid (ktx->ktx_nid);
-- unsigned long flags;
-- int rc;
--
-- ktx->ktx_launchtime = jiffies;
-
- if (kqswnal_data.kqn_shuttingdown)
- return (-ESHUTDOWN);
--
-- LASSERT (dest >= 0); /* must be a peer */
- if (ktx->ktx_state == KTX_GETTING) {
- /* NB ktx_frag[0] is the GET hdr + kqswnal_remotemd_t. The
- * other frags are the GET sink which we obviously don't
- * send here :) */
-
--#if MULTIRAIL_EKC
- if (ktx->ktx_nmappedpages != 0)
- attr = EP_SET_PREFRAIL(attr, ktx->ktx_rail);
-#endif
-
- switch (ktx->ktx_state) {
- case KTX_GETTING:
- case KTX_PUTTING:
- /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t.
- * The other frags are the payload, awaiting RDMA */
-- rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
-- ktx->ktx_port, attr,
-- kqswnal_txhandler, ktx,
-- NULL, ktx->ktx_frags, 1);
- #else
- rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
- ktx->ktx_port, attr, kqswnal_txhandler,
- ktx, NULL, ktx->ktx_frags, 1);
- #endif
- } else {
- break;
-
- case KTX_FORWARDING:
- case KTX_SENDING:
--#if MULTIRAIL_EKC
-- rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest,
-- ktx->ktx_port, attr,
-- kqswnal_txhandler, ktx,
-- NULL, ktx->ktx_frags, ktx->ktx_nfrag);
--#else
-- rc = ep_transmit_large(kqswnal_data.kqn_eptx, dest,
-- ktx->ktx_port, attr,
-- kqswnal_txhandler, ktx,
-- ktx->ktx_frags, ktx->ktx_nfrag);
--#endif
- break;
-
- default:
- LBUG();
- rc = -EINVAL; /* no compiler warning please */
- break;
-- }
--
-- switch (rc) {
-- case EP_SUCCESS: /* success */
-- return (0);
--
-- case EP_ENOMEM: /* can't allocate ep txd => queue for later */
- LASSERT (in_interrupt());
-
-- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
--
-- list_add_tail (&ktx->ktx_delayed_list, &kqswnal_data.kqn_delayedtxds);
-- wake_up (&kqswnal_data.kqn_sched_waitq);
--
-- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
-- return (0);
--
-- default: /* fatal error */
-- CERROR ("Tx to "LPX64" failed: %d\n", ktx->ktx_nid, rc);
-- kqswnal_notify_peer_down(ktx);
-- return (-EHOSTUNREACH);
-- }
--}
--
-#if 0
--static char *
--hdr_type_string (ptl_hdr_t *hdr)
--{
-- switch (hdr->type) {
-- case PTL_MSG_ACK:
-- return ("ACK");
-- case PTL_MSG_PUT:
-- return ("PUT");
-- case PTL_MSG_GET:
-- return ("GET");
-- case PTL_MSG_REPLY:
-- return ("REPLY");
-- default:
-- return ("<UNKNOWN>");
-- }
--}
--
--static void
--kqswnal_cerror_hdr(ptl_hdr_t * hdr)
--{
-- char *type_str = hdr_type_string (hdr);
--
-- CERROR("P3 Header at %p of type %s length %d\n", hdr, type_str,
- NTOH__u32(hdr->payload_length));
- CERROR(" From nid/pid "LPU64"/%u\n", NTOH__u64(hdr->src_nid),
- NTOH__u32(hdr->src_pid));
- CERROR(" To nid/pid "LPU64"/%u\n", NTOH__u64(hdr->dest_nid),
- NTOH__u32(hdr->dest_pid));
- le32_to_cpu(hdr->payload_length));
- CERROR(" From nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->src_nid),
- le32_to_cpu(hdr->src_pid));
- CERROR(" To nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->dest_nid),
- le32_to_cpu(hdr->dest_pid));
--
- switch (NTOH__u32(hdr->type)) {
- switch (le32_to_cpu(hdr->type)) {
-- case PTL_MSG_PUT:
-- CERROR(" Ptl index %d, ack md "LPX64"."LPX64", "
-- "match bits "LPX64"\n",
- NTOH__u32 (hdr->msg.put.ptl_index),
- le32_to_cpu(hdr->msg.put.ptl_index),
-- hdr->msg.put.ack_wmd.wh_interface_cookie,
-- hdr->msg.put.ack_wmd.wh_object_cookie,
- NTOH__u64 (hdr->msg.put.match_bits));
- le64_to_cpu(hdr->msg.put.match_bits));
-- CERROR(" offset %d, hdr data "LPX64"\n",
- NTOH__u32(hdr->msg.put.offset),
- le32_to_cpu(hdr->msg.put.offset),
-- hdr->msg.put.hdr_data);
-- break;
--
-- case PTL_MSG_GET:
-- CERROR(" Ptl index %d, return md "LPX64"."LPX64", "
-- "match bits "LPX64"\n",
- NTOH__u32 (hdr->msg.get.ptl_index),
- le32_to_cpu(hdr->msg.get.ptl_index),
-- hdr->msg.get.return_wmd.wh_interface_cookie,
-- hdr->msg.get.return_wmd.wh_object_cookie,
-- hdr->msg.get.match_bits);
-- CERROR(" Length %d, src offset %d\n",
- NTOH__u32 (hdr->msg.get.sink_length),
- NTOH__u32 (hdr->msg.get.src_offset));
- le32_to_cpu(hdr->msg.get.sink_length),
- le32_to_cpu(hdr->msg.get.src_offset));
-- break;
--
-- case PTL_MSG_ACK:
-- CERROR(" dst md "LPX64"."LPX64", manipulated length %d\n",
-- hdr->msg.ack.dst_wmd.wh_interface_cookie,
-- hdr->msg.ack.dst_wmd.wh_object_cookie,
- NTOH__u32 (hdr->msg.ack.mlength));
- le32_to_cpu(hdr->msg.ack.mlength));
-- break;
--
-- case PTL_MSG_REPLY:
-- CERROR(" dst md "LPX64"."LPX64"\n",
-- hdr->msg.reply.dst_wmd.wh_interface_cookie,
-- hdr->msg.reply.dst_wmd.wh_object_cookie);
-- }
--
--} /* end of print_hdr() */
-#endif
--
--#if !MULTIRAIL_EKC
--void
--kqswnal_print_eiov (int how, char *str, int n, EP_IOVEC *iov)
--{
-- int i;
--
-- CDEBUG (how, "%s: %d\n", str, n);
-- for (i = 0; i < n; i++) {
-- CDEBUG (how, " %08x for %d\n", iov[i].Base, iov[i].Len);
-- }
--}
--
--int
--kqswnal_eiovs2datav (int ndv, EP_DATAVEC *dv,
-- int nsrc, EP_IOVEC *src,
-- int ndst, EP_IOVEC *dst)
--{
-- int count;
-- int nob;
--
-- LASSERT (ndv > 0);
-- LASSERT (nsrc > 0);
-- LASSERT (ndst > 0);
--
-- for (count = 0; count < ndv; count++, dv++) {
--
-- if (nsrc == 0 || ndst == 0) {
-- if (nsrc != ndst) {
-- /* For now I'll barf on any left over entries */
-- CERROR ("mismatched src and dst iovs\n");
-- return (-EINVAL);
-- }
-- return (count);
-- }
--
-- nob = (src->Len < dst->Len) ? src->Len : dst->Len;
-- dv->Len = nob;
-- dv->Source = src->Base;
-- dv->Dest = dst->Base;
--
-- if (nob >= src->Len) {
-- src++;
-- nsrc--;
-- } else {
-- src->Len -= nob;
-- src->Base += nob;
-- }
--
-- if (nob >= dst->Len) {
-- dst++;
-- ndst--;
-- } else {
-- src->Len -= nob;
-- src->Base += nob;
-- }
-- }
--
-- CERROR ("DATAVEC too small\n");
-- return (-E2BIG);
-}
-#else
-int
-kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag,
- int nrfrag, EP_NMD *rfrag)
-{
- int i;
-
- if (nlfrag != nrfrag) {
- CERROR("Can't cope with unequal # frags: %d local %d remote\n",
- nlfrag, nrfrag);
- return (-EINVAL);
- }
-
- for (i = 0; i < nlfrag; i++)
- if (lfrag[i].nmd_len != rfrag[i].nmd_len) {
- CERROR("Can't cope with unequal frags %d(%d):"
- " %d local %d remote\n",
- i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len);
- return (-EINVAL);
- }
-
- return (0);
--}
--#endif
--
- int
- kqswnal_dma_reply (kqswnal_tx_t *ktx, int nfrag,
- struct iovec *iov, ptl_kiov_t *kiov,
- int offset, int nob)
-kqswnal_remotemd_t *
-kqswnal_parse_rmd (kqswnal_rx_t *krx, int type, ptl_nid_t expected_nid)
--{
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-- char *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
- ptl_hdr_t *hdr = (ptl_hdr_t *)buffer;
-- kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
- int rc;
- #if MULTIRAIL_EKC
- int i;
- #else
- EP_DATAVEC datav[EP_MAXFRAG];
- int ndatav;
- #endif
- LASSERT (krx->krx_rpc_reply_needed);
- LASSERT ((iov == NULL) != (kiov == NULL));
- ptl_nid_t nid = kqswnal_rx_nid(krx);
--
- /* see kqswnal_sendmsg comment regarding endian-ness */
- /* Note (1) lib_parse has already flipped hdr.
- * (2) RDMA addresses are sent in native endian-ness. When
- * EKC copes with different endian nodes, I'll fix this (and
- * eat my hat :) */
-
- LASSERT (krx->krx_nob >= sizeof(*hdr));
-
- if (hdr->type != type) {
- CERROR ("Unexpected optimized get/put type %d (%d expected)"
- "from "LPX64"\n", hdr->type, type, nid);
- return (NULL);
- }
-
- if (hdr->src_nid != nid) {
- CERROR ("Unexpected optimized get/put source NID "
- LPX64" from "LPX64"\n", hdr->src_nid, nid);
- return (NULL);
- }
-
- LASSERT (nid == expected_nid);
-
-- if (buffer + krx->krx_nob < (char *)(rmd + 1)) {
-- /* msg too small to discover rmd size */
-- CERROR ("Incoming message [%d] too small for RMD (%d needed)\n",
-- krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer));
- return (-EINVAL);
- return (NULL);
-- }
-
-
-- if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) {
-- /* rmd doesn't fit in the incoming message */
-- CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n",
-- krx->krx_nob, rmd->kqrmd_nfrag,
-- (int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer));
- return (-EINVAL);
- return (NULL);
-- }
--
- /* Map the source data... */
- return (rmd);
-}
-
-void
-kqswnal_rdma_store_complete (EP_RXD *rxd)
-{
- int status = ep_rxd_status(rxd);
- kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-
- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
- "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
-
- LASSERT (ktx->ktx_state == KTX_RDMAING);
- LASSERT (krx->krx_rxd == rxd);
- LASSERT (krx->krx_rpc_reply_needed);
-
- krx->krx_rpc_reply_needed = 0;
- kqswnal_rx_decref (krx);
-
- /* free ktx & finalize() its lib_msg_t */
- kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED);
-}
-
-void
-kqswnal_rdma_fetch_complete (EP_RXD *rxd)
-{
- /* Completed fetching the PUT data */
- int status = ep_rxd_status(rxd);
- kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
- unsigned long flags;
-
- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
- "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
-
- LASSERT (ktx->ktx_state == KTX_RDMAING);
- LASSERT (krx->krx_rxd == rxd);
- LASSERT (krx->krx_rpc_reply_needed);
-
- /* Set the RPC completion status */
- status = (status == EP_SUCCESS) ? 0 : -ECONNABORTED;
- krx->krx_rpc_reply_status = status;
-
- /* free ktx & finalize() its lib_msg_t */
- kqswnal_tx_done(ktx, status);
-
- if (!in_interrupt()) {
- /* OK to complete the RPC now (iff I had the last ref) */
- kqswnal_rx_decref (krx);
- return;
- }
-
- LASSERT (krx->krx_state == KRX_PARSE);
- krx->krx_state = KRX_COMPLETING;
-
- /* Complete the RPC in thread context */
- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-
- list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
- wake_up (&kqswnal_data.kqn_sched_waitq);
-
- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
-}
-
-int
-kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type,
- int niov, struct iovec *iov, ptl_kiov_t *kiov,
- size_t offset, size_t len)
-{
- kqswnal_remotemd_t *rmd;
- kqswnal_tx_t *ktx;
- int eprc;
- int rc;
-#if !MULTIRAIL_EKC
- EP_DATAVEC datav[EP_MAXFRAG];
- int ndatav;
-#endif
-
- LASSERT (type == PTL_MSG_GET || type == PTL_MSG_PUT);
- /* Not both mapped and paged payload */
- LASSERT (iov == NULL || kiov == NULL);
- /* RPC completes with failure by default */
- LASSERT (krx->krx_rpc_reply_needed);
- LASSERT (krx->krx_rpc_reply_status != 0);
-
- rmd = kqswnal_parse_rmd(krx, type, libmsg->ev.initiator.nid);
- if (rmd == NULL)
- return (-EPROTO);
-
- if (len == 0) {
- /* data got truncated to nothing. */
- lib_finalize(&kqswnal_lib, krx, libmsg, PTL_OK);
- /* Let kqswnal_rx_done() complete the RPC with success */
- krx->krx_rpc_reply_status = 0;
- return (0);
- }
-
- /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not
- actually sending a portals message with it */
- ktx = kqswnal_get_idle_tx(NULL, 0);
- if (ktx == NULL) {
- CERROR ("Can't get txd for RDMA with "LPX64"\n",
- libmsg->ev.initiator.nid);
- return (-ENOMEM);
- }
-
- ktx->ktx_state = KTX_RDMAING;
- ktx->ktx_nid = libmsg->ev.initiator.nid;
- ktx->ktx_args[0] = krx;
- ktx->ktx_args[1] = libmsg;
-
-#if MULTIRAIL_EKC
- /* Map on the rail the RPC prefers */
- ktx->ktx_rail = ep_rcvr_prefrail(krx->krx_eprx,
- ep_rxd_railmask(krx->krx_rxd));
-#endif
-
- /* Start mapping at offset 0 (we're not mapping any headers) */
-- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
-
-- if (kiov != NULL)
- rc = kqswnal_map_tx_kiov (ktx, offset, nob, nfrag, kiov);
- rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov);
-- else
- rc = kqswnal_map_tx_iov (ktx, offset, nob, nfrag, iov);
- rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov);
--
-- if (rc != 0) {
- CERROR ("Can't map source data: %d\n", rc);
- return (rc);
- CERROR ("Can't map local RDMA data: %d\n", rc);
- goto out;
-- }
--
--#if MULTIRAIL_EKC
- if (ktx->ktx_nfrag != rmd->kqrmd_nfrag) {
- CERROR("Can't cope with unequal # frags: %d local %d remote\n",
- ktx->ktx_nfrag, rmd->kqrmd_nfrag);
- return (-EINVAL);
- rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags,
- rmd->kqrmd_nfrag, rmd->kqrmd_frag);
- if (rc != 0) {
- CERROR ("Incompatible RDMA descriptors\n");
- goto out;
-- }
-
- for (i = 0; i < rmd->kqrmd_nfrag; i++)
- if (ktx->ktx_frags[i].nmd_len != rmd->kqrmd_frag[i].nmd_len) {
- CERROR("Can't cope with unequal frags %d(%d):"
- " %d local %d remote\n",
- i, rmd->kqrmd_nfrag,
- ktx->ktx_frags[i].nmd_len,
- rmd->kqrmd_frag[i].nmd_len);
- return (-EINVAL);
- }
--#else
- ndatav = kqswnal_eiovs2datav (EP_MAXFRAG, datav,
- ktx->ktx_nfrag, ktx->ktx_frags,
- rmd->kqrmd_nfrag, rmd->kqrmd_frag);
- switch (type) {
- default:
- LBUG();
-
- case PTL_MSG_GET:
- ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
- ktx->ktx_nfrag, ktx->ktx_frags,
- rmd->kqrmd_nfrag, rmd->kqrmd_frag);
- break;
-
- case PTL_MSG_PUT:
- ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
- rmd->kqrmd_nfrag, rmd->kqrmd_frag,
- ktx->ktx_nfrag, ktx->ktx_frags);
- break;
- }
-
-- if (ndatav < 0) {
-- CERROR ("Can't create datavec: %d\n", ndatav);
- return (ndatav);
- rc = ndatav;
- goto out;
-- }
--#endif
--
- /* Our caller will start to race with kqswnal_dma_reply_complete... */
- LASSERT (atomic_read (&krx->krx_refcount) == 1);
- atomic_set (&krx->krx_refcount, 2);
- LASSERT (atomic_read(&krx->krx_refcount) > 0);
- /* Take an extra ref for the completion callback */
- atomic_inc(&krx->krx_refcount);
--
- #if MULTIRAIL_EKC
- rc = ep_complete_rpc(krx->krx_rxd, kqswnal_dma_reply_complete, ktx,
- &kqswnal_rpc_success,
- ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
- if (rc == EP_SUCCESS)
- return (0);
- switch (type) {
- default:
- LBUG();
--
- /* Well we tried... */
- krx->krx_rpc_reply_needed = 0;
- case PTL_MSG_GET:
-#if MULTIRAIL_EKC
- eprc = ep_complete_rpc(krx->krx_rxd,
- kqswnal_rdma_store_complete, ktx,
- &kqswnal_data.kqn_rpc_success,
- ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
--#else
- rc = ep_complete_rpc (krx->krx_rxd, kqswnal_dma_reply_complete, ktx,
- &kqswnal_rpc_success, datav, ndatav);
- if (rc == EP_SUCCESS)
- return (0);
-
- /* "old" EKC destroys rxd on failed completion */
- krx->krx_rxd = NULL;
- eprc = ep_complete_rpc (krx->krx_rxd,
- kqswnal_rdma_store_complete, ktx,
- &kqswnal_data.kqn_rpc_success,
- datav, ndatav);
- if (eprc != EP_SUCCESS) /* "old" EKC destroys rxd on failed completion */
- krx->krx_rxd = NULL;
--#endif
-
- CERROR("can't complete RPC: %d\n", rc);
- if (eprc != EP_SUCCESS) {
- CERROR("can't complete RPC: %d\n", eprc);
- /* don't re-attempt RPC completion */
- krx->krx_rpc_reply_needed = 0;
- rc = -ECONNABORTED;
- }
- break;
-
- case PTL_MSG_PUT:
-#if MULTIRAIL_EKC
- eprc = ep_rpc_get (krx->krx_rxd,
- kqswnal_rdma_fetch_complete, ktx,
- rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag);
-#else
- eprc = ep_rpc_get (krx->krx_rxd,
- kqswnal_rdma_fetch_complete, ktx,
- datav, ndatav);
-#endif
- if (eprc != EP_SUCCESS) {
- CERROR("ep_rpc_get failed: %d\n", eprc);
- rc = -ECONNABORTED;
- }
- break;
- }
--
- /* reset refcount back to 1: we're not going to be racing with
- * kqswnal_dma_reply_complete. */
- atomic_set (&krx->krx_refcount, 1);
- out:
- if (rc != 0) {
- kqswnal_rx_decref(krx); /* drop callback's ref */
- kqswnal_put_idle_tx (ktx);
- }
--
- return (-ECONNABORTED);
- atomic_dec(&kqswnal_data.kqn_pending_txs);
- return (rc);
--}
--
--static ptl_err_t
- kqswnal_sendmsg (nal_cb_t *nal,
-kqswnal_sendmsg (lib_nal_t *nal,
-- void *private,
-- lib_msg_t *libmsg,
-- ptl_hdr_t *hdr,
-- int type,
-- ptl_nid_t nid,
-- ptl_pid_t pid,
-- unsigned int payload_niov,
-- struct iovec *payload_iov,
-- ptl_kiov_t *payload_kiov,
-- size_t payload_offset,
-- size_t payload_nob)
--{
-- kqswnal_tx_t *ktx;
-- int rc;
-- ptl_nid_t targetnid;
--#if KQSW_CHECKSUM
-- int i;
-- kqsw_csum_t csum;
-- int sumoff;
-- int sumnob;
--#endif
- /* NB 1. hdr is in network byte order */
- /* 2. 'private' depends on the message type */
--
-- CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64
-- " pid %u\n", payload_nob, payload_niov, nid, pid);
--
-- LASSERT (payload_nob == 0 || payload_niov > 0);
-- LASSERT (payload_niov <= PTL_MD_MAX_IOV);
--
-- /* It must be OK to kmap() if required */
-- LASSERT (payload_kiov == NULL || !in_interrupt ());
-- /* payload is either all vaddrs or all pages */
-- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
-
-- if (payload_nob > KQSW_MAXPAYLOAD) {
-- CERROR ("request exceeds MTU size "LPSZ" (max %u).\n",
-- payload_nob, KQSW_MAXPAYLOAD);
-- return (PTL_FAIL);
- }
-
- if (type == PTL_MSG_REPLY && /* can I look in 'private' */
- ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { /* is it an RPC */
- /* Must be a REPLY for an optimized GET */
- rc = kqswnal_rdma ((kqswnal_rx_t *)private, libmsg, PTL_MSG_GET,
- payload_niov, payload_iov, payload_kiov,
- payload_offset, payload_nob);
- return ((rc == 0) ? PTL_OK : PTL_FAIL);
-- }
--
-- targetnid = nid;
-- if (kqswnal_nid2elanid (nid) < 0) { /* Can't send direct: find gateway? */
-- rc = kpr_lookup (&kqswnal_data.kqn_router, nid,
-- sizeof (ptl_hdr_t) + payload_nob, &targetnid);
-- if (rc != 0) {
-- CERROR("Can't route to "LPX64": router error %d\n",
-- nid, rc);
-- return (PTL_FAIL);
-- }
-- if (kqswnal_nid2elanid (targetnid) < 0) {
-- CERROR("Bad gateway "LPX64" for "LPX64"\n",
-- targetnid, nid);
-- return (PTL_FAIL);
-- }
-- }
--
-- /* I may not block for a transmit descriptor if I might block the
-- * receiver, or an interrupt handler. */
-- ktx = kqswnal_get_idle_tx(NULL, !(type == PTL_MSG_ACK ||
-- type == PTL_MSG_REPLY ||
-- in_interrupt()));
-- if (ktx == NULL) {
- kqswnal_cerror_hdr (hdr);
- return (PTL_NOSPACE);
- CERROR ("Can't get txd for msg type %d for "LPX64"\n",
- type, libmsg->ev.initiator.nid);
- return (PTL_NO_SPACE);
-- }
--
- ktx->ktx_state = KTX_SENDING;
-- ktx->ktx_nid = targetnid;
-- ktx->ktx_args[0] = private;
-- ktx->ktx_args[1] = libmsg;
-
- if (type == PTL_MSG_REPLY &&
- ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) {
- if (nid != targetnid ||
- kqswnal_nid2elanid(nid) !=
- ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd)) {
- CERROR("Optimized reply nid conflict: "
- "nid "LPX64" via "LPX64" elanID %d\n",
- nid, targetnid,
- ep_rxd_node(((kqswnal_rx_t *)private)->krx_rxd));
- return (PTL_FAIL);
- }
-
- /* peer expects RPC completion with GET data */
- rc = kqswnal_dma_reply (ktx, payload_niov,
- payload_iov, payload_kiov,
- payload_offset, payload_nob);
- if (rc == 0)
- return (PTL_OK);
-
- CERROR ("Can't DMA reply to "LPX64": %d\n", nid, rc);
- kqswnal_put_idle_tx (ktx);
- return (PTL_FAIL);
- }
- ktx->ktx_args[2] = NULL; /* set when a GET commits to REPLY */
--
-- memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */
- ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
--
--#if KQSW_CHECKSUM
-- csum = kqsw_csum (0, (char *)hdr, sizeof (*hdr));
-- memcpy (ktx->ktx_buffer + sizeof (*hdr), &csum, sizeof (csum));
-- for (csum = 0, i = 0, sumoff = payload_offset, sumnob = payload_nob; sumnob > 0; i++) {
-- LASSERT(i < niov);
-- if (payload_kiov != NULL) {
-- ptl_kiov_t *kiov = &payload_kiov[i];
--
-- if (sumoff >= kiov->kiov_len) {
-- sumoff -= kiov->kiov_len;
-- } else {
-- char *addr = ((char *)kmap (kiov->kiov_page)) +
-- kiov->kiov_offset + sumoff;
-- int fragnob = kiov->kiov_len - sumoff;
--
-- csum = kqsw_csum(csum, addr, MIN(sumnob, fragnob));
-- sumnob -= fragnob;
-- sumoff = 0;
-- kunmap(kiov->kiov_page);
-- }
-- } else {
-- struct iovec *iov = &payload_iov[i];
--
-- if (sumoff > iov->iov_len) {
-- sumoff -= iov->iov_len;
-- } else {
-- char *addr = iov->iov_base + sumoff;
-- int fragnob = iov->iov_len - sumoff;
--
-- csum = kqsw_csum(csum, addr, MIN(sumnob, fragnob));
-- sumnob -= fragnob;
-- sumoff = 0;
-- }
-- }
-- }
-- memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
--#endif
--
- if (kqswnal_data.kqn_optimized_gets &&
- type == PTL_MSG_GET && /* doing a GET */
- nid == targetnid) { /* not forwarding */
- /* The first frag will be the pre-mapped buffer for (at least) the
- * portals header. */
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-
- if (nid == targetnid && /* not forwarding */
- ((type == PTL_MSG_GET && /* optimize GET? */
- kqswnal_tunables.kqn_optimized_gets != 0 &&
- le32_to_cpu(hdr->msg.get.sink_length) >= kqswnal_tunables.kqn_optimized_gets) ||
- (type == PTL_MSG_PUT && /* optimize PUT? */
- kqswnal_tunables.kqn_optimized_puts != 0 &&
- payload_nob >= kqswnal_tunables.kqn_optimized_puts))) {
-- lib_md_t *md = libmsg->md;
-- kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(ktx->ktx_buffer + KQSW_HDR_SIZE);
--
- /* Optimised path: I send over the Elan vaddrs of the get
- * sink buffers, and my peer DMAs directly into them.
- /* Optimised path: I send over the Elan vaddrs of the local
- * buffers, and my peer DMAs directly to/from them.
-- *
-- * First I set up ktx as if it was going to send this
-- * payload, (it needs to map it anyway). This fills
-- * ktx_frags[1] and onward with the network addresses
-- * of the GET sink frags. I copy these into ktx_buffer,
- * immediately after the header, and send that as my GET
- * message.
- *
- * Note that the addresses are sent in native endian-ness.
- * When EKC copes with different endian nodes, I'll fix
- * this (and eat my hat :) */
- * immediately after the header, and send that as my
- * message. */
--
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_state = KTX_GETTING;
- ktx->ktx_state = (type == PTL_MSG_PUT) ? KTX_PUTTING : KTX_GETTING;
--
-- if ((libmsg->md->options & PTL_MD_KIOV) != 0)
-- rc = kqswnal_map_tx_kiov (ktx, 0, md->length,
-- md->md_niov, md->md_iov.kiov);
-- else
-- rc = kqswnal_map_tx_iov (ktx, 0, md->length,
-- md->md_niov, md->md_iov.iov);
-
- if (rc < 0) {
- kqswnal_put_idle_tx (ktx);
- return (PTL_FAIL);
- }
- if (rc != 0)
- goto out;
--
-- rmd->kqrmd_nfrag = ktx->ktx_nfrag - 1;
--
-- payload_nob = offsetof(kqswnal_remotemd_t,
-- kqrmd_frag[rmd->kqrmd_nfrag]);
-- LASSERT (KQSW_HDR_SIZE + payload_nob <= KQSW_TX_BUFFER_SIZE);
--
--#if MULTIRAIL_EKC
-- memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1],
-- rmd->kqrmd_nfrag * sizeof(EP_NMD));
--
-- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-- 0, KQSW_HDR_SIZE + payload_nob);
--#else
-- memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1],
-- rmd->kqrmd_nfrag * sizeof(EP_IOVEC));
--
-- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob;
--#endif
- if (type == PTL_MSG_GET) {
- /* Allocate reply message now while I'm in thread context */
- ktx->ktx_args[2] = lib_create_reply_msg (&kqswnal_lib,
- nid, libmsg);
- if (ktx->ktx_args[2] == NULL)
- goto out;
-
- /* NB finalizing the REPLY message is my
- * responsibility now, whatever happens. */
- }
-
-- } else if (payload_nob <= KQSW_TX_MAXCONTIG) {
--
-- /* small message: single frag copied into the pre-mapped buffer */
--
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_state = KTX_SENDING;
--#if MULTIRAIL_EKC
-- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-- 0, KQSW_HDR_SIZE + payload_nob);
--#else
-- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob;
--#endif
-- if (payload_nob > 0) {
-- if (payload_kiov != NULL)
-- lib_copy_kiov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE,
-- payload_niov, payload_kiov,
-- payload_offset, payload_nob);
-- else
-- lib_copy_iov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE,
-- payload_niov, payload_iov,
-- payload_offset, payload_nob);
-- }
-- } else {
--
-- /* large message: multiple frags: first is hdr in pre-mapped buffer */
--
- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- ktx->ktx_state = KTX_SENDING;
--#if MULTIRAIL_EKC
-- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-- 0, KQSW_HDR_SIZE);
--#else
-- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE;
--#endif
-- if (payload_kiov != NULL)
-- rc = kqswnal_map_tx_kiov (ktx, payload_offset, payload_nob,
-- payload_niov, payload_kiov);
-- else
-- rc = kqswnal_map_tx_iov (ktx, payload_offset, payload_nob,
-- payload_niov, payload_iov);
- if (rc != 0) {
- kqswnal_put_idle_tx (ktx);
- return (PTL_FAIL);
- }
- if (rc != 0)
- goto out;
-- }
--
-- ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ?
-- EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
--
-- rc = kqswnal_launch (ktx);
- if (rc != 0) { /* failed? */
- CERROR ("Failed to send packet to "LPX64": %d\n", targetnid, rc);
-
- out:
- CDEBUG(rc == 0 ? D_NET : D_ERROR,
- "%s "LPSZ" bytes to "LPX64" via "LPX64": rc %d\n",
- rc == 0 ? "Sent" : "Failed to send",
- payload_nob, nid, targetnid, rc);
-
- if (rc != 0) {
- if (ktx->ktx_state == KTX_GETTING &&
- ktx->ktx_args[2] != NULL) {
- /* We committed to reply, but there was a problem
- * launching the GET. We can't avoid delivering a
- * REPLY event since we committed above, so we
- * pretend the GET succeeded but the REPLY
- * failed. */
- rc = 0;
- lib_finalize (&kqswnal_lib, private, libmsg, PTL_OK);
- lib_finalize (&kqswnal_lib, private,
- (lib_msg_t *)ktx->ktx_args[2], PTL_FAIL);
- }
-
-- kqswnal_put_idle_tx (ktx);
- return (PTL_FAIL);
-- }
-
- CDEBUG(D_NET, "sent "LPSZ" bytes to "LPX64" via "LPX64"\n",
- payload_nob, nid, targetnid);
- return (PTL_OK);
-
- atomic_dec(&kqswnal_data.kqn_pending_txs);
- return (rc == 0 ? PTL_OK : PTL_FAIL);
--}
--
--static ptl_err_t
- kqswnal_send (nal_cb_t *nal,
-kqswnal_send (lib_nal_t *nal,
-- void *private,
-- lib_msg_t *libmsg,
-- ptl_hdr_t *hdr,
-- int type,
-- ptl_nid_t nid,
-- ptl_pid_t pid,
-- unsigned int payload_niov,
-- struct iovec *payload_iov,
-- size_t payload_offset,
-- size_t payload_nob)
--{
-- return (kqswnal_sendmsg (nal, private, libmsg, hdr, type, nid, pid,
-- payload_niov, payload_iov, NULL,
-- payload_offset, payload_nob));
--}
--
--static ptl_err_t
- kqswnal_send_pages (nal_cb_t *nal,
-kqswnal_send_pages (lib_nal_t *nal,
-- void *private,
-- lib_msg_t *libmsg,
-- ptl_hdr_t *hdr,
-- int type,
-- ptl_nid_t nid,
-- ptl_pid_t pid,
-- unsigned int payload_niov,
-- ptl_kiov_t *payload_kiov,
-- size_t payload_offset,
-- size_t payload_nob)
--{
-- return (kqswnal_sendmsg (nal, private, libmsg, hdr, type, nid, pid,
-- payload_niov, NULL, payload_kiov,
-- payload_offset, payload_nob));
--}
--
--void
--kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
--{
-- int rc;
-- kqswnal_tx_t *ktx;
-- ptl_kiov_t *kiov = fwd->kprfd_kiov;
-- int niov = fwd->kprfd_niov;
-- int nob = fwd->kprfd_nob;
-- ptl_nid_t nid = fwd->kprfd_gateway_nid;
--
--#if KQSW_CHECKSUM
-- CERROR ("checksums for forwarded packets not implemented\n");
-- LBUG ();
--#endif
-- /* The router wants this NAL to forward a packet */
-- CDEBUG (D_NET, "forwarding [%p] to "LPX64", payload: %d frags %d bytes\n",
-- fwd, nid, niov, nob);
--
-- ktx = kqswnal_get_idle_tx (fwd, 0);
-- if (ktx == NULL) /* can't get txd right now */
-- return; /* fwd will be scheduled when tx desc freed */
--
- if (nid == kqswnal_lib.ni.nid) /* gateway is me */
- if (nid == kqswnal_lib.libnal_ni.ni_pid.nid) /* gateway is me */
-- nid = fwd->kprfd_target_nid; /* target is final dest */
--
-- if (kqswnal_nid2elanid (nid) < 0) {
-- CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid);
-- rc = -EHOSTUNREACH;
- goto failed;
- goto out;
-- }
--
-- /* copy hdr into pre-mapped buffer */
-- memcpy(ktx->ktx_buffer, fwd->kprfd_hdr, sizeof(ptl_hdr_t));
- ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer;
--
-- ktx->ktx_port = (nob <= KQSW_SMALLPAYLOAD) ?
-- EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
-- ktx->ktx_nid = nid;
-- ktx->ktx_state = KTX_FORWARDING;
-- ktx->ktx_args[0] = fwd;
-- ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
--
-- if (nob <= KQSW_TX_MAXCONTIG)
-- {
-- /* send payload from ktx's pre-mapped contiguous buffer */
--#if MULTIRAIL_EKC
-- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-- 0, KQSW_HDR_SIZE + nob);
--#else
-- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + nob;
--#endif
-- if (nob > 0)
-- lib_copy_kiov2buf(ktx->ktx_buffer + KQSW_HDR_SIZE,
-- niov, kiov, 0, nob);
-- }
-- else
-- {
-- /* zero copy payload */
--#if MULTIRAIL_EKC
-- ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-- 0, KQSW_HDR_SIZE);
--#else
-- ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-- ktx->ktx_frags[0].Len = KQSW_HDR_SIZE;
--#endif
-- rc = kqswnal_map_tx_kiov (ktx, 0, nob, niov, kiov);
-- if (rc != 0)
- goto failed;
- goto out;
-- }
--
-- rc = kqswnal_launch (ktx);
- if (rc == 0)
- return;
- out:
- if (rc != 0) {
- CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc);
--
- failed:
- LASSERT (rc != 0);
- CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc);
- /* complete now (with failure) */
- kqswnal_tx_done (ktx, rc);
- }
--
- kqswnal_put_idle_tx (ktx);
- /* complete now (with failure) */
- kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc);
- atomic_dec(&kqswnal_data.kqn_pending_txs);
--}
--
--void
--kqswnal_fwd_callback (void *arg, int error)
--{
-- kqswnal_rx_t *krx = (kqswnal_rx_t *)arg;
--
-- /* The router has finished forwarding this packet */
--
-- if (error != 0)
-- {
-- ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
--
-- CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
- NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error);
- le64_to_cpu(hdr->src_nid), le64_to_cpu(hdr->dest_nid),error);
-- }
--
- kqswnal_requeue_rx (krx);
- LASSERT (atomic_read(&krx->krx_refcount) == 1);
- kqswnal_rx_decref (krx);
--}
--
--void
- kqswnal_dma_reply_complete (EP_RXD *rxd)
-kqswnal_requeue_rx (kqswnal_rx_t *krx)
--{
- int status = ep_rxd_status(rxd);
- kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
- kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
- lib_msg_t *msg = (lib_msg_t *)ktx->ktx_args[1];
-
- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
- "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
- LASSERT (atomic_read(&krx->krx_refcount) == 0);
- LASSERT (!krx->krx_rpc_reply_needed);
--
- LASSERT (krx->krx_rxd == rxd);
- LASSERT (krx->krx_rpc_reply_needed);
- krx->krx_state = KRX_POSTED;
--
- krx->krx_rpc_reply_needed = 0;
- kqswnal_rx_done (krx);
-#if MULTIRAIL_EKC
- if (kqswnal_data.kqn_shuttingdown) {
- /* free EKC rxd on shutdown */
- ep_complete_receive(krx->krx_rxd);
- } else {
- /* repost receive */
- ep_requeue_receive(krx->krx_rxd,
- kqswnal_rxhandler, krx,
- &krx->krx_elanbuffer, 0);
- }
-#else
- if (kqswnal_data.kqn_shuttingdown)
- return;
--
- lib_finalize (&kqswnal_lib, NULL, msg,
- (status == EP_SUCCESS) ? PTL_OK : PTL_FAIL);
- kqswnal_put_idle_tx (ktx);
- if (krx->krx_rxd == NULL) {
- /* We had a failed ep_complete_rpc() which nukes the
- * descriptor in "old" EKC */
- int eprc = ep_queue_receive(krx->krx_eprx,
- kqswnal_rxhandler, krx,
- krx->krx_elanbuffer,
- krx->krx_npages * PAGE_SIZE, 0);
- LASSERT (eprc == EP_SUCCESS);
- /* We don't handle failure here; it's incredibly rare
- * (never reported?) and only happens with "old" EKC */
- } else {
- ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
- krx->krx_elanbuffer,
- krx->krx_npages * PAGE_SIZE);
- }
-#endif
--}
--
--void
--kqswnal_rpc_complete (EP_RXD *rxd)
--{
-- int status = ep_rxd_status(rxd);
-- kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg(rxd);
--
-- CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
-- "rxd %p, krx %p, status %d\n", rxd, krx, status);
--
-- LASSERT (krx->krx_rxd == rxd);
-- LASSERT (krx->krx_rpc_reply_needed);
--
-- krx->krx_rpc_reply_needed = 0;
-- kqswnal_requeue_rx (krx);
--}
--
--void
- kqswnal_requeue_rx (kqswnal_rx_t *krx)
-kqswnal_rx_done (kqswnal_rx_t *krx)
--{
- int rc;
- int rc;
- EP_STATUSBLK *sblk;
--
-- LASSERT (atomic_read(&krx->krx_refcount) == 0);
--
-- if (krx->krx_rpc_reply_needed) {
- /* We've not completed the peer's RPC yet... */
- sblk = (krx->krx_rpc_reply_status == 0) ?
- &kqswnal_data.kqn_rpc_success :
- &kqswnal_data.kqn_rpc_failed;
--
- /* We failed to complete the peer's optimized GET (e.g. we
- * couldn't map the source buffers). We complete the
- * peer's EKC rpc now with failure. */
- LASSERT (!in_interrupt());
--#if MULTIRAIL_EKC
- rc = ep_complete_rpc(krx->krx_rxd, kqswnal_rpc_complete, krx,
- &kqswnal_rpc_failed, NULL, NULL, 0);
- rc = ep_complete_rpc(krx->krx_rxd,
- kqswnal_rpc_complete, krx,
- sblk, NULL, NULL, 0);
-- if (rc == EP_SUCCESS)
-- return;
-
- CERROR("can't complete RPC: %d\n", rc);
--#else
- if (krx->krx_rxd != NULL) {
- /* We didn't try (and fail) to complete earlier... */
- rc = ep_complete_rpc(krx->krx_rxd,
- kqswnal_rpc_complete, krx,
- &kqswnal_rpc_failed, NULL, 0);
- if (rc == EP_SUCCESS)
- return;
-
- CERROR("can't complete RPC: %d\n", rc);
- }
-
- /* NB the old ep_complete_rpc() frees rxd on failure, so we
- * have to requeue from scratch here, unless we're shutting
- * down */
- if (kqswnal_data.kqn_shuttingdown)
- rc = ep_complete_rpc(krx->krx_rxd,
- kqswnal_rpc_complete, krx,
- sblk, NULL, 0);
- if (rc == EP_SUCCESS)
-- return;
--
- rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
- krx->krx_elanbuffer,
- krx->krx_npages * PAGE_SIZE, 0);
- LASSERT (rc == EP_SUCCESS);
- /* We don't handle failure here; it's incredibly rare
- * (never reported?) and only happens with "old" EKC */
- return;
- /* "old" EKC destroys rxd on failed completion */
- krx->krx_rxd = NULL;
--#endif
- CERROR("can't complete RPC: %d\n", rc);
- krx->krx_rpc_reply_needed = 0;
-- }
--
- #if MULTIRAIL_EKC
- if (kqswnal_data.kqn_shuttingdown) {
- /* free EKC rxd on shutdown */
- ep_complete_receive(krx->krx_rxd);
- } else {
- /* repost receive */
- ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
- &krx->krx_elanbuffer, 0);
- }
- #else
- /* don't actually requeue on shutdown */
- if (!kqswnal_data.kqn_shuttingdown)
- ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
- krx->krx_elanbuffer, krx->krx_npages * PAGE_SIZE);
- #endif
- kqswnal_requeue_rx(krx);
--}
--
--void
- kqswnal_rx (kqswnal_rx_t *krx)
-kqswnal_parse (kqswnal_rx_t *krx)
--{
-- ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
- ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid);
- ptl_nid_t dest_nid = le64_to_cpu(hdr->dest_nid);
-- int payload_nob;
-- int nob;
-- int niov;
--
- LASSERT (atomic_read(&krx->krx_refcount) == 0);
- LASSERT (atomic_read(&krx->krx_refcount) == 1);
--
- if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */
- atomic_set(&krx->krx_refcount, 1);
- lib_parse (&kqswnal_lib, hdr, krx);
- kqswnal_rx_done(krx);
- if (dest_nid == kqswnal_lib.libnal_ni.ni_pid.nid) { /* It's for me :) */
- /* I ignore parse errors since I'm not consuming a byte
- * stream */
- (void)lib_parse (&kqswnal_lib, hdr, krx);
-
- /* Drop my ref; any RDMA activity takes an additional ref */
- kqswnal_rx_decref(krx);
-- return;
-- }
--
--#if KQSW_CHECKSUM
- CERROR ("checksums for forwarded packets not implemented\n");
- LBUG ();
- LASSERTF (0, "checksums for forwarded packets not implemented\n");
--#endif
-
-- if (kqswnal_nid2elanid (dest_nid) >= 0) /* should have gone direct to peer */
-- {
-- CERROR("dropping packet from "LPX64" for "LPX64
- ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid);
- ": target is peer\n", le64_to_cpu(hdr->src_nid), dest_nid);
--
- kqswnal_requeue_rx (krx);
- kqswnal_rx_decref (krx);
-- return;
-- }
--
-- nob = payload_nob = krx->krx_nob - KQSW_HDR_SIZE;
-- niov = 0;
-- if (nob > 0) {
-- krx->krx_kiov[0].kiov_offset = KQSW_HDR_SIZE;
-- krx->krx_kiov[0].kiov_len = MIN(PAGE_SIZE - KQSW_HDR_SIZE, nob);
-- niov = 1;
-- nob -= PAGE_SIZE - KQSW_HDR_SIZE;
--
-- while (nob > 0) {
-- LASSERT (niov < krx->krx_npages);
--
-- krx->krx_kiov[niov].kiov_offset = 0;
-- krx->krx_kiov[niov].kiov_len = MIN(PAGE_SIZE, nob);
-- niov++;
-- nob -= PAGE_SIZE;
-- }
-- }
--
-- kpr_fwd_init (&krx->krx_fwd, dest_nid,
-- hdr, payload_nob, niov, krx->krx_kiov,
-- kqswnal_fwd_callback, krx);
--
-- kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd);
--}
--
--/* Receive Interrupt Handler: posts to schedulers */
--void
--kqswnal_rxhandler(EP_RXD *rxd)
--{
-- unsigned long flags;
-- int nob = ep_rxd_len (rxd);
-- int status = ep_rxd_status (rxd);
-- kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg (rxd);
--
-- CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n",
-- rxd, krx, nob, status);
--
-- LASSERT (krx != NULL);
-
- LASSERT (krx->krx_state = KRX_POSTED);
-
- krx->krx_state = KRX_PARSE;
-- krx->krx_rxd = rxd;
-- krx->krx_nob = nob;
--#if MULTIRAIL_EKC
-- krx->krx_rpc_reply_needed = (status != EP_SHUTDOWN) && ep_rxd_isrpc(rxd);
--#else
-- krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd);
--#endif
-
- /* Default to failure if an RPC reply is requested but not handled */
- krx->krx_rpc_reply_status = -EPROTO;
- atomic_set (&krx->krx_refcount, 1);
-
-- /* must receive a whole header to be able to parse */
-- if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t))
-- {
-- /* receives complete with failure when receiver is removed */
--#if MULTIRAIL_EKC
-- if (status == EP_SHUTDOWN)
-- LASSERT (kqswnal_data.kqn_shuttingdown);
-- else
-- CERROR("receive status failed with status %d nob %d\n",
-- ep_rxd_status(rxd), nob);
--#else
-- if (!kqswnal_data.kqn_shuttingdown)
-- CERROR("receive status failed with status %d nob %d\n",
-- ep_rxd_status(rxd), nob);
--#endif
- kqswnal_requeue_rx (krx);
- kqswnal_rx_decref(krx);
-- return;
-- }
--
-- if (!in_interrupt()) {
- kqswnal_rx (krx);
- kqswnal_parse(krx);
-- return;
-- }
--
-- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
--
-- list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
-- wake_up (&kqswnal_data.kqn_sched_waitq);
--
-- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
--}
--
--#if KQSW_CHECKSUM
--void
--kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
--{
-- ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
--
-- CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64
-- ", dpid %d, spid %d, type %d\n",
-- ishdr ? "Header" : "Payload", krx,
- NTOH__u64(hdr->dest_nid), NTOH__u64(hdr->src_nid)
- NTOH__u32(hdr->dest_pid), NTOH__u32(hdr->src_pid),
- NTOH__u32(hdr->type));
- le64_to_cpu(hdr->dest_nid), le64_to_cpu(hdr->src_nid)
- le32_to_cpu(hdr->dest_pid), le32_to_cpu(hdr->src_pid),
- le32_to_cpu(hdr->type));
--
- switch (NTOH__u32 (hdr->type))
- switch (le32_to_cpu(hdr->type))
-- {
-- case PTL_MSG_ACK:
-- CERROR("ACK: mlen %d dmd "LPX64"."LPX64" match "LPX64
-- " len %u\n",
- NTOH__u32(hdr->msg.ack.mlength),
- le32_to_cpu(hdr->msg.ack.mlength),
-- hdr->msg.ack.dst_wmd.handle_cookie,
-- hdr->msg.ack.dst_wmd.handle_idx,
- NTOH__u64(hdr->msg.ack.match_bits),
- NTOH__u32(hdr->msg.ack.length));
- le64_to_cpu(hdr->msg.ack.match_bits),
- le32_to_cpu(hdr->msg.ack.length));
-- break;
-- case PTL_MSG_PUT:
-- CERROR("PUT: ptl %d amd "LPX64"."LPX64" match "LPX64
-- " len %u off %u data "LPX64"\n",
- NTOH__u32(hdr->msg.put.ptl_index),
- le32_to_cpu(hdr->msg.put.ptl_index),
-- hdr->msg.put.ack_wmd.handle_cookie,
-- hdr->msg.put.ack_wmd.handle_idx,
- NTOH__u64(hdr->msg.put.match_bits),
- NTOH__u32(hdr->msg.put.length),
- NTOH__u32(hdr->msg.put.offset),
- le64_to_cpu(hdr->msg.put.match_bits),
- le32_to_cpu(hdr->msg.put.length),
- le32_to_cpu(hdr->msg.put.offset),
-- hdr->msg.put.hdr_data);
-- break;
-- case PTL_MSG_GET:
-- CERROR ("GET: <>\n");
-- break;
-- case PTL_MSG_REPLY:
-- CERROR ("REPLY: <>\n");
-- break;
-- default:
-- CERROR ("TYPE?: <>\n");
-- }
--}
--#endif
--
--static ptl_err_t
- kqswnal_recvmsg (nal_cb_t *nal,
-kqswnal_recvmsg (lib_nal_t *nal,
-- void *private,
-- lib_msg_t *libmsg,
-- unsigned int niov,
-- struct iovec *iov,
-- ptl_kiov_t *kiov,
-- size_t offset,
-- size_t mlen,
-- size_t rlen)
--{
-- kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
-- char *buffer = page_address(krx->krx_kiov[0].kiov_page);
- ptl_hdr_t *hdr = (ptl_hdr_t *)buffer;
-- int page;
-- char *page_ptr;
-- int page_nob;
-- char *iov_ptr;
-- int iov_nob;
-- int frag;
- int rc;
--#if KQSW_CHECKSUM
-- kqsw_csum_t senders_csum;
-- kqsw_csum_t payload_csum = 0;
- kqsw_csum_t hdr_csum = kqsw_csum(0, buffer, sizeof(ptl_hdr_t));
- kqsw_csum_t hdr_csum = kqsw_csum(0, hdr, sizeof(*hdr));
-- size_t csum_len = mlen;
-- int csum_frags = 0;
-- int csum_nob = 0;
-- static atomic_t csum_counter;
-- int csum_verbose = (atomic_read(&csum_counter)%1000001) == 0;
--
-- atomic_inc (&csum_counter);
--
-- memcpy (&senders_csum, buffer + sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
-- if (senders_csum != hdr_csum)
-- kqswnal_csum_error (krx, 1);
--#endif
- /* NB lib_parse() has already flipped *hdr */
-
-- CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen);
-
- if (krx->krx_rpc_reply_needed &&
- hdr->type == PTL_MSG_PUT) {
- /* This must be an optimized PUT */
- rc = kqswnal_rdma (krx, libmsg, PTL_MSG_PUT,
- niov, iov, kiov, offset, mlen);
- return (rc == 0 ? PTL_OK : PTL_FAIL);
- }
--
-- /* What was actually received must be >= payload. */
-- LASSERT (mlen <= rlen);
-- if (krx->krx_nob < KQSW_HDR_SIZE + mlen) {
-- CERROR("Bad message size: have %d, need %d + %d\n",
-- krx->krx_nob, (int)KQSW_HDR_SIZE, (int)mlen);
-- return (PTL_FAIL);
-- }
--
-- /* It must be OK to kmap() if required */
-- LASSERT (kiov == NULL || !in_interrupt ());
-- /* Either all pages or all vaddrs */
-- LASSERT (!(kiov != NULL && iov != NULL));
--
-- if (mlen != 0) {
-- page = 0;
-- page_ptr = buffer + KQSW_HDR_SIZE;
-- page_nob = PAGE_SIZE - KQSW_HDR_SIZE;
--
-- LASSERT (niov > 0);
--
-- if (kiov != NULL) {
-- /* skip complete frags */
-- while (offset >= kiov->kiov_len) {
-- offset -= kiov->kiov_len;
-- kiov++;
-- niov--;
-- LASSERT (niov > 0);
-- }
-- iov_ptr = ((char *)kmap (kiov->kiov_page)) +
-- kiov->kiov_offset + offset;
-- iov_nob = kiov->kiov_len - offset;
-- } else {
-- /* skip complete frags */
-- while (offset >= iov->iov_len) {
-- offset -= iov->iov_len;
-- iov++;
-- niov--;
-- LASSERT (niov > 0);
-- }
-- iov_ptr = iov->iov_base + offset;
-- iov_nob = iov->iov_len - offset;
-- }
--
-- for (;;)
-- {
-- frag = mlen;
-- if (frag > page_nob)
-- frag = page_nob;
-- if (frag > iov_nob)
-- frag = iov_nob;
--
-- memcpy (iov_ptr, page_ptr, frag);
--#if KQSW_CHECKSUM
-- payload_csum = kqsw_csum (payload_csum, iov_ptr, frag);
-- csum_nob += frag;
-- csum_frags++;
--#endif
-- mlen -= frag;
-- if (mlen == 0)
-- break;
--
-- page_nob -= frag;
-- if (page_nob != 0)
-- page_ptr += frag;
-- else
-- {
-- page++;
-- LASSERT (page < krx->krx_npages);
-- page_ptr = page_address(krx->krx_kiov[page].kiov_page);
-- page_nob = PAGE_SIZE;
-- }
--
-- iov_nob -= frag;
-- if (iov_nob != 0)
-- iov_ptr += frag;
-- else if (kiov != NULL) {
-- kunmap (kiov->kiov_page);
-- kiov++;
-- niov--;
-- LASSERT (niov > 0);
-- iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
-- iov_nob = kiov->kiov_len;
-- } else {
-- iov++;
-- niov--;
-- LASSERT (niov > 0);
-- iov_ptr = iov->iov_base;
-- iov_nob = iov->iov_len;
-- }
-- }
--
-- if (kiov != NULL)
-- kunmap (kiov->kiov_page);
-- }
--
--#if KQSW_CHECKSUM
-- memcpy (&senders_csum, buffer + sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t),
-- sizeof(kqsw_csum_t));
--
-- if (csum_len != rlen)
-- CERROR("Unable to checksum data in user's buffer\n");
-- else if (senders_csum != payload_csum)
-- kqswnal_csum_error (krx, 0);
--
-- if (csum_verbose)
-- CERROR("hdr csum %lx, payload_csum %lx, csum_frags %d, "
-- "csum_nob %d\n",
-- hdr_csum, payload_csum, csum_frags, csum_nob);
--#endif
-- lib_finalize(nal, private, libmsg, PTL_OK);
--
-- return (PTL_OK);
--}
--
--static ptl_err_t
- kqswnal_recv(nal_cb_t *nal,
-kqswnal_recv(lib_nal_t *nal,
-- void *private,
-- lib_msg_t *libmsg,
-- unsigned int niov,
-- struct iovec *iov,
-- size_t offset,
-- size_t mlen,
-- size_t rlen)
--{
-- return (kqswnal_recvmsg(nal, private, libmsg,
-- niov, iov, NULL,
-- offset, mlen, rlen));
--}
--
--static ptl_err_t
- kqswnal_recv_pages (nal_cb_t *nal,
-kqswnal_recv_pages (lib_nal_t *nal,
-- void *private,
-- lib_msg_t *libmsg,
-- unsigned int niov,
-- ptl_kiov_t *kiov,
-- size_t offset,
-- size_t mlen,
-- size_t rlen)
--{
-- return (kqswnal_recvmsg(nal, private, libmsg,
-- niov, NULL, kiov,
-- offset, mlen, rlen));
--}
--
--int
--kqswnal_thread_start (int (*fn)(void *arg), void *arg)
--{
-- long pid = kernel_thread (fn, arg, 0);
--
-- if (pid < 0)
-- return ((int)pid);
--
-- atomic_inc (&kqswnal_data.kqn_nthreads);
- atomic_inc (&kqswnal_data.kqn_nthreads_running);
-- return (0);
--}
--
--void
--kqswnal_thread_fini (void)
--{
-- atomic_dec (&kqswnal_data.kqn_nthreads);
--}
--
--int
--kqswnal_scheduler (void *arg)
--{
-- kqswnal_rx_t *krx;
-- kqswnal_tx_t *ktx;
-- kpr_fwd_desc_t *fwd;
-- unsigned long flags;
-- int rc;
-- int counter = 0;
- int shuttingdown = 0;
-- int did_something;
--
-- kportal_daemonize ("kqswnal_sched");
-- kportal_blockallsigs ();
--
-- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
--
-- for (;;)
-- {
- if (kqswnal_data.kqn_shuttingdown != shuttingdown) {
-
- if (kqswnal_data.kqn_shuttingdown == 2)
- break;
-
- /* During stage 1 of shutdown we are still responsive
- * to receives */
-
- atomic_dec (&kqswnal_data.kqn_nthreads_running);
- shuttingdown = kqswnal_data.kqn_shuttingdown;
- }
-
-- did_something = 0;
--
-- if (!list_empty (&kqswnal_data.kqn_readyrxds))
-- {
-- krx = list_entry(kqswnal_data.kqn_readyrxds.next,
-- kqswnal_rx_t, krx_list);
-- list_del (&krx->krx_list);
-- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-- flags);
--
- kqswnal_rx (krx);
- switch (krx->krx_state) {
- case KRX_PARSE:
- kqswnal_parse (krx);
- break;
- case KRX_COMPLETING:
- /* Drop last ref to reply to RPC and requeue */
- LASSERT (krx->krx_rpc_reply_needed);
- kqswnal_rx_decref (krx);
- break;
- default:
- LBUG();
- }
--
-- did_something = 1;
-- spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
-- }
--
- if (!shuttingdown &&
- !list_empty (&kqswnal_data.kqn_delayedtxds))
- if (!list_empty (&kqswnal_data.kqn_delayedtxds))
-- {
-- ktx = list_entry(kqswnal_data.kqn_delayedtxds.next,
-- kqswnal_tx_t, ktx_list);
-- list_del_init (&ktx->ktx_delayed_list);
-- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-- flags);
--
-- rc = kqswnal_launch (ktx);
- if (rc != 0) /* failed: ktx_nid down? */
- {
- if (rc != 0) {
-- CERROR("Failed delayed transmit to "LPX64
-- ": %d\n", ktx->ktx_nid, rc);
-- kqswnal_tx_done (ktx, rc);
-- }
- atomic_dec (&kqswnal_data.kqn_pending_txs);
--
-- did_something = 1;
-- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-- }
--
- if (!shuttingdown &
- !list_empty (&kqswnal_data.kqn_delayedfwds))
- if (!list_empty (&kqswnal_data.kqn_delayedfwds))
-- {
-- fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, kpr_fwd_desc_t, kprfd_list);
-- list_del (&fwd->kprfd_list);
-- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
--
- /* If we're shutting down, this will just requeue fwd on kqn_idletxd_fwdq */
-- kqswnal_fwd_packet (NULL, fwd);
--
-- did_something = 1;
-- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-- }
--
- /* nothing to do or hogging CPU */
- /* nothing to do or hogging CPU */
-- if (!did_something || counter++ == KQSW_RESCHED) {
-- spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-- flags);
--
-- counter = 0;
--
-- if (!did_something) {
- if (kqswnal_data.kqn_shuttingdown == 2) {
- /* We only exit in stage 2 of shutdown when
- * there's nothing left to do */
- break;
- }
-- rc = wait_event_interruptible (kqswnal_data.kqn_sched_waitq,
- kqswnal_data.kqn_shuttingdown != shuttingdown ||
- kqswnal_data.kqn_shuttingdown == 2 ||
-- !list_empty(&kqswnal_data.kqn_readyrxds) ||
-- !list_empty(&kqswnal_data.kqn_delayedtxds) ||
-- !list_empty(&kqswnal_data.kqn_delayedfwds));
-- LASSERT (rc == 0);
-- } else if (need_resched())
-- schedule ();
--
-- spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-- }
-- }
-
- spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
--
-- kqswnal_thread_fini ();
-- return (0);
--}
--
- nal_cb_t kqswnal_lib =
-lib_nal_t kqswnal_lib =
--{
- nal_data: &kqswnal_data, /* NAL private data */
- cb_send: kqswnal_send,
- cb_send_pages: kqswnal_send_pages,
- cb_recv: kqswnal_recv,
- cb_recv_pages: kqswnal_recv_pages,
- cb_read: kqswnal_read,
- cb_write: kqswnal_write,
- cb_malloc: kqswnal_malloc,
- cb_free: kqswnal_free,
- cb_printf: kqswnal_printf,
- cb_cli: kqswnal_cli,
- cb_sti: kqswnal_sti,
- cb_dist: kqswnal_dist
- libnal_data: &kqswnal_data, /* NAL private data */
- libnal_send: kqswnal_send,
- libnal_send_pages: kqswnal_send_pages,
- libnal_recv: kqswnal_recv,
- libnal_recv_pages: kqswnal_recv_pages,
- libnal_dist: kqswnal_dist
--};
+++ /dev/null
--.deps
--Makefile
--autoMakefile.in
--autoMakefile
--*.ko
--*.mod.c
--.*.flags
--.*.cmd
--.tmp_versions
--.depend
+++ /dev/null
--.deps
--Makefile
--.*.cmd
--autoMakefile.in
--autoMakefile
--*.ko
--*.mod.c
--.*.flags
--.tmp_versions
--.depend
+++ /dev/null
--MODULES := ksocknal
--ksocknal-objs := socknal.o socknal_cb.o
--
--# If you don't build with -O2, your modules won't insert, becahse htonl is
--# just special that way.
--EXTRA_POST_CFLAGS := -O2
--
--@INCLUDE_RULES@
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--if MODULES
--if !CRAY_PORTALS
--modulenet_DATA = ksocknal$(KMODEXT)
--endif
--endif
--
--MOSTLYCLEANFILES = *.o *.ko *.mod.c
--DIST_SOURCES = $(ksocknal-objs:%.o=%.c) socknal.h
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- * Author: Zach Brown <zab@zabbo.net>
-- * Author: Peter J. Braam <braam@clusterfs.com>
-- * Author: Phil Schwan <phil@clusterfs.com>
-- * Author: Eric Barton <eric@bartonsoftware.com>
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#include "socknal.h"
--
-nal_t ksocknal_api;
-ksock_nal_data_t ksocknal_data;
--ptl_handle_ni_t ksocknal_ni;
- static nal_t ksocknal_api;
- #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
- ksock_nal_data_t ksocknal_data;
- #else
- static ksock_nal_data_t ksocknal_data;
- #endif
-ksock_tunables_t ksocknal_tunables;
--
--kpr_nal_interface_t ksocknal_router_interface = {
-- kprni_nalid: SOCKNAL,
-- kprni_arg: &ksocknal_data,
-- kprni_fwd: ksocknal_fwd_packet,
-- kprni_notify: ksocknal_notify,
--};
--
-#ifdef CONFIG_SYSCTL
--#define SOCKNAL_SYSCTL 200
--
- #define SOCKNAL_SYSCTL_TIMEOUT 1
- #define SOCKNAL_SYSCTL_EAGER_ACK 2
- #define SOCKNAL_SYSCTL_ZERO_COPY 3
- #define SOCKNAL_SYSCTL_TYPED 4
- #define SOCKNAL_SYSCTL_MIN_BULK 5
-#define SOCKNAL_SYSCTL_TIMEOUT 1
-#define SOCKNAL_SYSCTL_EAGER_ACK 2
-#define SOCKNAL_SYSCTL_ZERO_COPY 3
-#define SOCKNAL_SYSCTL_TYPED 4
-#define SOCKNAL_SYSCTL_MIN_BULK 5
-#define SOCKNAL_SYSCTL_BUFFER_SIZE 6
-#define SOCKNAL_SYSCTL_NAGLE 7
-#define SOCKNAL_SYSCTL_IRQ_AFFINITY 8
-#define SOCKNAL_SYSCTL_KEEPALIVE_IDLE 9
-#define SOCKNAL_SYSCTL_KEEPALIVE_COUNT 10
-#define SOCKNAL_SYSCTL_KEEPALIVE_INTVL 11
--
--static ctl_table ksocknal_ctl_table[] = {
-- {SOCKNAL_SYSCTL_TIMEOUT, "timeout",
- &ksocknal_data.ksnd_io_timeout, sizeof (int),
- &ksocknal_tunables.ksnd_io_timeout, sizeof (int),
-- 0644, NULL, &proc_dointvec},
-- {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack",
- &ksocknal_data.ksnd_eager_ack, sizeof (int),
- &ksocknal_tunables.ksnd_eager_ack, sizeof (int),
-- 0644, NULL, &proc_dointvec},
--#if SOCKNAL_ZC
-- {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy",
- &ksocknal_data.ksnd_zc_min_frag, sizeof (int),
- &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int),
-- 0644, NULL, &proc_dointvec},
--#endif
-- {SOCKNAL_SYSCTL_TYPED, "typed",
- &ksocknal_data.ksnd_typed_conns, sizeof (int),
- &ksocknal_tunables.ksnd_typed_conns, sizeof (int),
-- 0644, NULL, &proc_dointvec},
-- {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk",
- &ksocknal_data.ksnd_min_bulk, sizeof (int),
- &ksocknal_tunables.ksnd_min_bulk, sizeof (int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_BUFFER_SIZE, "buffer_size",
- &ksocknal_tunables.ksnd_buffer_size, sizeof(int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_NAGLE, "nagle",
- &ksocknal_tunables.ksnd_nagle, sizeof(int),
- 0644, NULL, &proc_dointvec},
-#if CPU_AFFINITY
- {SOCKNAL_SYSCTL_IRQ_AFFINITY, "irq_affinity",
- &ksocknal_tunables.ksnd_irq_affinity, sizeof(int),
- 0644, NULL, &proc_dointvec},
-#endif
- {SOCKNAL_SYSCTL_KEEPALIVE_IDLE, "keepalive_idle",
- &ksocknal_tunables.ksnd_keepalive_idle, sizeof(int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_KEEPALIVE_COUNT, "keepalive_count",
- &ksocknal_tunables.ksnd_keepalive_count, sizeof(int),
- 0644, NULL, &proc_dointvec},
- {SOCKNAL_SYSCTL_KEEPALIVE_INTVL, "keepalive_intvl",
- &ksocknal_tunables.ksnd_keepalive_intvl, sizeof(int),
-- 0644, NULL, &proc_dointvec},
-- { 0 }
--};
--
--static ctl_table ksocknal_top_ctl_table[] = {
-- {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table},
-- { 0 }
--};
-
- int
- ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
- {
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
-
- lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */
- return PTL_OK;
- }
-
- int
- ksocknal_api_shutdown(nal_t *nal, int ni)
- {
- return PTL_OK;
- }
-
- void
- ksocknal_api_yield(nal_t *nal)
- {
- our_cond_resched();
- return;
- }
-
- void
- ksocknal_api_lock(nal_t *nal, unsigned long *flags)
- {
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_cli(nal_cb,flags);
- }
-
- void
- ksocknal_api_unlock(nal_t *nal, unsigned long *flags)
- {
- ksock_nal_data_t *k;
- nal_cb_t *nal_cb;
-
- k = nal->nal_data;
- nal_cb = k->ksnd_nal_cb;
- nal_cb->cb_sti(nal_cb,flags);
- }
-
- nal_t *
- ksocknal_init(int interface, ptl_pt_index_t ptl_size,
- ptl_ac_index_t ac_size, ptl_pid_t requested_pid)
- {
- CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0);
- lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size);
- return (&ksocknal_api);
- }
-
- /*
- * EXTRA functions follow
- */
-#endif
--
--int
--ksocknal_set_mynid(ptl_nid_t nid)
--{
- lib_ni_t *ni = &ksocknal_lib.ni;
- lib_ni_t *ni = &ksocknal_lib.libnal_ni;
--
-- /* FIXME: we have to do this because we call lib_init() at module
-- * insertion time, which is before we have 'mynid' available. lib_init
-- * sets the NAL's nid, which it uses to tell other nodes where packets
-- * are coming from. This is not a very graceful solution to this
-- * problem. */
--
-- CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
- nid, ni->nid);
- nid, ni->ni_pid.nid);
--
- ni->nid = nid;
- ni->ni_pid.nid = nid;
-- return (0);
--}
--
--void
--ksocknal_bind_irq (unsigned int irq)
--{
--#if (defined(CONFIG_SMP) && CPU_AFFINITY)
-- int bind;
-- int cpu;
-- unsigned long flags;
-- char cmdline[64];
-- ksock_irqinfo_t *info;
-- char *argv[] = {"/bin/sh",
-- "-c",
-- cmdline,
-- NULL};
-- char *envp[] = {"HOME=/",
-- "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-- NULL};
--
-- LASSERT (irq < NR_IRQS);
-- if (irq == 0) /* software NIC or affinity disabled */
-- return;
--
-- info = &ksocknal_data.ksnd_irqinfo[irq];
--
-- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
--
-- LASSERT (info->ksni_valid);
-- bind = !info->ksni_bound;
-- info->ksni_bound = 1;
--
-- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
--
-- if (!bind) /* bound already */
-- return;
--
-- cpu = ksocknal_irqsched2cpu(info->ksni_sched);
-- snprintf (cmdline, sizeof (cmdline),
-- "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
--
-- printk (KERN_INFO "Lustre: Binding irq %u to CPU %d with cmd: %s\n",
-- irq, cpu, cmdline);
--
-- /* FIXME: Find a better method of setting IRQ affinity...
-- */
--
-- USERMODEHELPER(argv[0], argv, envp);
--#endif
-}
-
-ksock_interface_t *
-ksocknal_ip2iface(__u32 ip)
-{
- int i;
- ksock_interface_t *iface;
-
- for (i = 0; i < ksocknal_data.ksnd_ninterfaces; i++) {
- LASSERT(i < SOCKNAL_MAX_INTERFACES);
- iface = &ksocknal_data.ksnd_interfaces[i];
-
- if (iface->ksni_ipaddr == ip)
- return (iface);
- }
-
- return (NULL);
--}
--
--ksock_route_t *
- ksocknal_create_route (__u32 ipaddr, int port, int buffer_size,
- int irq_affinity, int eager)
-ksocknal_create_route (__u32 ipaddr, int port)
--{
-- ksock_route_t *route;
--
-- PORTAL_ALLOC (route, sizeof (*route));
-- if (route == NULL)
-- return (NULL);
--
-- atomic_set (&route->ksnr_refcount, 1);
- route->ksnr_sharecount = 0;
-- route->ksnr_peer = NULL;
-- route->ksnr_timeout = jiffies;
-- route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL;
-- route->ksnr_ipaddr = ipaddr;
-- route->ksnr_port = port;
- route->ksnr_buffer_size = buffer_size;
- route->ksnr_irq_affinity = irq_affinity;
- route->ksnr_eager = eager;
-- route->ksnr_connecting = 0;
-- route->ksnr_connected = 0;
-- route->ksnr_deleted = 0;
-- route->ksnr_conn_count = 0;
- route->ksnr_share_count = 0;
--
-- return (route);
--}
--
--void
--ksocknal_destroy_route (ksock_route_t *route)
--{
- LASSERT (route->ksnr_sharecount == 0);
-
-- if (route->ksnr_peer != NULL)
-- ksocknal_put_peer (route->ksnr_peer);
--
-- PORTAL_FREE (route, sizeof (*route));
--}
--
--void
--ksocknal_put_route (ksock_route_t *route)
--{
-- CDEBUG (D_OTHER, "putting route[%p] (%d)\n",
-- route, atomic_read (&route->ksnr_refcount));
--
-- LASSERT (atomic_read (&route->ksnr_refcount) > 0);
-- if (!atomic_dec_and_test (&route->ksnr_refcount))
-- return;
--
-- ksocknal_destroy_route (route);
--}
--
--ksock_peer_t *
--ksocknal_create_peer (ptl_nid_t nid)
--{
-- ksock_peer_t *peer;
--
-- LASSERT (nid != PTL_NID_ANY);
--
-- PORTAL_ALLOC (peer, sizeof (*peer));
-- if (peer == NULL)
-- return (NULL);
--
- memset (peer, 0, sizeof (*peer));
- memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */
--
-- peer->ksnp_nid = nid;
-- atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */
-- peer->ksnp_closing = 0;
-- INIT_LIST_HEAD (&peer->ksnp_conns);
-- INIT_LIST_HEAD (&peer->ksnp_routes);
-- INIT_LIST_HEAD (&peer->ksnp_tx_queue);
--
-- atomic_inc (&ksocknal_data.ksnd_npeers);
-- return (peer);
--}
--
--void
--ksocknal_destroy_peer (ksock_peer_t *peer)
--{
-- CDEBUG (D_NET, "peer "LPX64" %p deleted\n", peer->ksnp_nid, peer);
--
-- LASSERT (atomic_read (&peer->ksnp_refcount) == 0);
-- LASSERT (list_empty (&peer->ksnp_conns));
-- LASSERT (list_empty (&peer->ksnp_routes));
-- LASSERT (list_empty (&peer->ksnp_tx_queue));
--
-- PORTAL_FREE (peer, sizeof (*peer));
--
-- /* NB a peer's connections and autoconnect routes keep a reference
-- * on their peer until they are destroyed, so we can be assured
-- * that _all_ state to do with this peer has been cleaned up when
-- * its refcount drops to zero. */
-- atomic_dec (&ksocknal_data.ksnd_npeers);
--}
--
--void
--ksocknal_put_peer (ksock_peer_t *peer)
--{
-- CDEBUG (D_OTHER, "putting peer[%p] -> "LPX64" (%d)\n",
-- peer, peer->ksnp_nid,
-- atomic_read (&peer->ksnp_refcount));
--
-- LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
-- if (!atomic_dec_and_test (&peer->ksnp_refcount))
-- return;
--
-- ksocknal_destroy_peer (peer);
--}
--
--ksock_peer_t *
--ksocknal_find_peer_locked (ptl_nid_t nid)
--{
-- struct list_head *peer_list = ksocknal_nid2peerlist (nid);
-- struct list_head *tmp;
-- ksock_peer_t *peer;
--
-- list_for_each (tmp, peer_list) {
--
-- peer = list_entry (tmp, ksock_peer_t, ksnp_list);
--
-- LASSERT (!peer->ksnp_closing);
- LASSERT (!(list_empty (&peer->ksnp_routes) &&
- list_empty (&peer->ksnp_conns)));
--
-- if (peer->ksnp_nid != nid)
-- continue;
--
-- CDEBUG(D_NET, "got peer [%p] -> "LPX64" (%d)\n",
-- peer, nid, atomic_read (&peer->ksnp_refcount));
-- return (peer);
-- }
-- return (NULL);
--}
--
--ksock_peer_t *
--ksocknal_get_peer (ptl_nid_t nid)
--{
-- ksock_peer_t *peer;
--
-- read_lock (&ksocknal_data.ksnd_global_lock);
-- peer = ksocknal_find_peer_locked (nid);
-- if (peer != NULL) /* +1 ref for caller? */
-- atomic_inc (&peer->ksnp_refcount);
-- read_unlock (&ksocknal_data.ksnd_global_lock);
--
-- return (peer);
--}
--
--void
--ksocknal_unlink_peer_locked (ksock_peer_t *peer)
--{
- int i;
- __u32 ip;
-
- for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
- LASSERT (i < SOCKNAL_MAX_INTERFACES);
- ip = peer->ksnp_passive_ips[i];
-
- ksocknal_ip2iface(ip)->ksni_npeers--;
- }
-
- LASSERT (list_empty(&peer->ksnp_conns));
- LASSERT (list_empty(&peer->ksnp_routes));
-- LASSERT (!peer->ksnp_closing);
-- peer->ksnp_closing = 1;
-- list_del (&peer->ksnp_list);
-- /* lose peerlist's ref */
-- ksocknal_put_peer (peer);
--}
--
- ksock_route_t *
- ksocknal_get_route_by_idx (int index)
-int
-ksocknal_get_peer_info (int index, ptl_nid_t *nid,
- __u32 *myip, __u32 *peer_ip, int *port,
- int *conn_count, int *share_count)
--{
-- ksock_peer_t *peer;
-- struct list_head *ptmp;
-- ksock_route_t *route;
-- struct list_head *rtmp;
-- int i;
- int j;
- int rc = -ENOENT;
--
-- read_lock (&ksocknal_data.ksnd_global_lock);
--
-- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-
-- list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
-- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
--
- LASSERT (!(list_empty (&peer->ksnp_routes) &&
- list_empty (&peer->ksnp_conns)));
- if (peer->ksnp_n_passive_ips == 0 &&
- list_empty(&peer->ksnp_routes)) {
- if (index-- > 0)
- continue;
-
- *nid = peer->ksnp_nid;
- *myip = 0;
- *peer_ip = 0;
- *port = 0;
- *conn_count = 0;
- *share_count = 0;
- rc = 0;
- goto out;
- }
--
- for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
- if (index-- > 0)
- continue;
-
- *nid = peer->ksnp_nid;
- *myip = peer->ksnp_passive_ips[j];
- *peer_ip = 0;
- *port = 0;
- *conn_count = 0;
- *share_count = 0;
- rc = 0;
- goto out;
- }
-
-- list_for_each (rtmp, &peer->ksnp_routes) {
-- if (index-- > 0)
-- continue;
--
- route = list_entry (rtmp, ksock_route_t, ksnr_list);
- atomic_inc (&route->ksnr_refcount);
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return (route);
- route = list_entry(rtmp, ksock_route_t,
- ksnr_list);
-
- *nid = peer->ksnp_nid;
- *myip = route->ksnr_myipaddr;
- *peer_ip = route->ksnr_ipaddr;
- *port = route->ksnr_port;
- *conn_count = route->ksnr_conn_count;
- *share_count = route->ksnr_share_count;
- rc = 0;
- goto out;
-- }
-- }
-- }
-
- out:
-- read_unlock (&ksocknal_data.ksnd_global_lock);
- return (NULL);
- return (rc);
-}
-
-void
-ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
-{
- ksock_peer_t *peer = route->ksnr_peer;
- int type = conn->ksnc_type;
- ksock_interface_t *iface;
-
- conn->ksnc_route = route;
- atomic_inc (&route->ksnr_refcount);
-
- if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
- if (route->ksnr_myipaddr == 0) {
- /* route wasn't bound locally yet (the initial route) */
- CWARN("Binding "LPX64" %u.%u.%u.%u to %u.%u.%u.%u\n",
- peer->ksnp_nid,
- HIPQUAD(route->ksnr_ipaddr),
- HIPQUAD(conn->ksnc_myipaddr));
- } else {
- CWARN("Rebinding "LPX64" %u.%u.%u.%u from "
- "%u.%u.%u.%u to %u.%u.%u.%u\n",
- peer->ksnp_nid,
- HIPQUAD(route->ksnr_ipaddr),
- HIPQUAD(route->ksnr_myipaddr),
- HIPQUAD(conn->ksnc_myipaddr));
-
- iface = ksocknal_ip2iface(route->ksnr_myipaddr);
- if (iface != NULL)
- iface->ksni_nroutes--;
- }
- route->ksnr_myipaddr = conn->ksnc_myipaddr;
- iface = ksocknal_ip2iface(route->ksnr_myipaddr);
- if (iface != NULL)
- iface->ksni_nroutes++;
- }
-
- route->ksnr_connected |= (1<<type);
- route->ksnr_connecting &= ~(1<<type);
- route->ksnr_conn_count++;
-
- /* Successful connection => further attempts can
- * proceed immediately */
- route->ksnr_timeout = jiffies;
- route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL;
-}
-
-void
-ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
-{
- struct list_head *tmp;
- ksock_conn_t *conn;
- int type;
- ksock_route_t *route2;
-
- LASSERT (route->ksnr_peer == NULL);
- LASSERT (route->ksnr_connecting == 0);
- LASSERT (route->ksnr_connected == 0);
-
- /* LASSERT(unique) */
- list_for_each(tmp, &peer->ksnp_routes) {
- route2 = list_entry(tmp, ksock_route_t, ksnr_list);
-
- if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
- CERROR ("Duplicate route "LPX64" %u.%u.%u.%u\n",
- peer->ksnp_nid, HIPQUAD(route->ksnr_ipaddr));
- LBUG();
- }
- }
-
- route->ksnr_peer = peer;
- atomic_inc (&peer->ksnp_refcount);
- /* peer's routelist takes over my ref on 'route' */
- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-
- list_for_each(tmp, &peer->ksnp_conns) {
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
- type = conn->ksnc_type;
-
- if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
- continue;
-
- ksocknal_associate_route_conn_locked(route, conn);
- /* keep going (typed routes) */
- }
-}
-
-void
-ksocknal_del_route_locked (ksock_route_t *route)
-{
- ksock_peer_t *peer = route->ksnr_peer;
- ksock_interface_t *iface;
- ksock_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
-
- LASSERT (!route->ksnr_deleted);
-
- /* Close associated conns */
- list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
- conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_route != route)
- continue;
-
- ksocknal_close_conn_locked (conn, 0);
- }
-
- if (route->ksnr_myipaddr != 0) {
- iface = ksocknal_ip2iface(route->ksnr_myipaddr);
- if (iface != NULL)
- iface->ksni_nroutes--;
- }
-
- route->ksnr_deleted = 1;
- list_del (&route->ksnr_list);
- ksocknal_put_route (route); /* drop peer's ref */
-
- if (list_empty (&peer->ksnp_routes) &&
- list_empty (&peer->ksnp_conns)) {
- /* I've just removed the last autoconnect route of a peer
- * with no active connections */
- ksocknal_unlink_peer_locked (peer);
- }
--}
--
--int
- ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob,
- int bind_irq, int share, int eager)
-ksocknal_add_peer (ptl_nid_t nid, __u32 ipaddr, int port)
--{
-- unsigned long flags;
- struct list_head *tmp;
-- ksock_peer_t *peer;
-- ksock_peer_t *peer2;
-- ksock_route_t *route;
- struct list_head *rtmp;
-- ksock_route_t *route2;
--
-- if (nid == PTL_NID_ANY)
-- return (-EINVAL);
--
-- /* Have a brand new peer ready... */
-- peer = ksocknal_create_peer (nid);
-- if (peer == NULL)
-- return (-ENOMEM);
--
- route = ksocknal_create_route (ipaddr, port, bufnob,
- bind_irq, eager);
- route = ksocknal_create_route (ipaddr, port);
-- if (route == NULL) {
-- ksocknal_put_peer (peer);
-- return (-ENOMEM);
-- }
--
-- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
--
-- peer2 = ksocknal_find_peer_locked (nid);
-- if (peer2 != NULL) {
-- ksocknal_put_peer (peer);
-- peer = peer2;
-- } else {
- /* peer table takes existing ref on peer */
- list_add (&peer->ksnp_list,
- ksocknal_nid2peerlist (nid));
- /* peer table takes my ref on peer */
- list_add_tail (&peer->ksnp_list,
- ksocknal_nid2peerlist (nid));
-- }
--
-- route2 = NULL;
- if (share) {
- /* check for existing route to this NID via this ipaddr */
- list_for_each (rtmp, &peer->ksnp_routes) {
- route2 = list_entry (rtmp, ksock_route_t, ksnr_list);
-
- if (route2->ksnr_ipaddr == ipaddr)
- break;
-
- route2 = NULL;
- }
- list_for_each (tmp, &peer->ksnp_routes) {
- route2 = list_entry(tmp, ksock_route_t, ksnr_list);
-
- if (route2->ksnr_ipaddr == ipaddr)
- break;
-
- route2 = NULL;
-- }
-
- if (route2 != NULL) {
- ksocknal_put_route (route);
- route = route2;
- if (route2 == NULL) {
- ksocknal_add_route_locked(peer, route);
- route->ksnr_share_count++;
-- } else {
- /* route takes a ref on peer */
- route->ksnr_peer = peer;
- atomic_inc (&peer->ksnp_refcount);
- /* peer's route list takes existing ref on route */
- list_add_tail (&route->ksnr_list, &peer->ksnp_routes);
- ksocknal_put_route(route);
- route2->ksnr_share_count++;
-- }
-
- route->ksnr_sharecount++;
--
-- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
--
-- return (0);
--}
--
--void
- ksocknal_del_route_locked (ksock_route_t *route, int share, int keep_conn)
-ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip, int single_share)
--{
- ksock_peer_t *peer = route->ksnr_peer;
-- ksock_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- ksock_route_t *route;
- struct list_head *tmp;
- struct list_head *nxt;
- int nshared;
--
- if (!share)
- route->ksnr_sharecount = 0;
- else {
- route->ksnr_sharecount--;
- if (route->ksnr_sharecount != 0)
- return;
- }
- LASSERT (!peer->ksnp_closing);
--
- list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
- conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
- list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t, ksnr_list);
--
- if (conn->ksnc_route != route)
- if (single_share && route->ksnr_share_count == 0)
-- continue;
-
- if (!keep_conn) {
- ksocknal_close_conn_locked (conn, 0);
-
- /* no match */
- if (!(ip == 0 || route->ksnr_ipaddr == ip))
-- continue;
-
- if (!single_share)
- route->ksnr_share_count = 0;
- else if (route->ksnr_share_count > 0)
- route->ksnr_share_count--;
-
- if (route->ksnr_share_count == 0) {
- /* This deletes associated conns too */
- ksocknal_del_route_locked (route);
-- }
--
- /* keeping the conn; just dissociate it and route... */
- conn->ksnc_route = NULL;
- ksocknal_put_route (route); /* drop conn's ref on route */
- if (single_share)
- break;
-- }
-
- route->ksnr_deleted = 1;
- list_del (&route->ksnr_list);
- ksocknal_put_route (route); /* drop peer's ref */
--
- if (list_empty (&peer->ksnp_routes) &&
- list_empty (&peer->ksnp_conns)) {
- /* I've just removed the last autoconnect route of a peer
- * with no active connections */
- ksocknal_unlink_peer_locked (peer);
- nshared = 0;
- list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t, ksnr_list);
- nshared += route->ksnr_share_count;
- }
-
- if (nshared == 0) {
- /* remove everything else if there are no explicit entries
- * left */
-
- list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t, ksnr_list);
-
- /* we should only be removing auto-entries */
- LASSERT(route->ksnr_share_count == 0);
- ksocknal_del_route_locked (route);
- }
-
- list_for_each_safe (tmp, nxt, &peer->ksnp_conns) {
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- ksocknal_close_conn_locked(conn, 0);
- }
-- }
-
- /* NB peer unlinks itself when last conn/route is removed */
--}
--
--int
- ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr, int share, int keep_conn)
-ksocknal_del_peer (ptl_nid_t nid, __u32 ip, int single_share)
--{
-- unsigned long flags;
-- struct list_head *ptmp;
-- struct list_head *pnxt;
-- ksock_peer_t *peer;
- struct list_head *rtmp;
- struct list_head *rnxt;
- ksock_route_t *route;
-- int lo;
-- int hi;
-- int i;
-- int rc = -ENOENT;
--
-- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
--
-- if (nid != PTL_NID_ANY)
-- lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers;
-- else {
-- lo = 0;
-- hi = ksocknal_data.ksnd_peer_hash_size - 1;
-- }
--
-- for (i = lo; i <= hi; i++) {
-- list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
-- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
--
-- if (!(nid == PTL_NID_ANY || peer->ksnp_nid == nid))
-- continue;
-
- list_for_each_safe (rtmp, rnxt, &peer->ksnp_routes) {
- route = list_entry (rtmp, ksock_route_t,
- ksnr_list);
--
- if (!(ipaddr == 0 ||
- route->ksnr_ipaddr == ipaddr))
- continue;
- ksocknal_del_peer_locked (peer, ip, single_share);
- rc = 0; /* matched! */
--
- ksocknal_del_route_locked (route, share, keep_conn);
- rc = 0; /* matched something */
- if (share)
- goto out;
- }
- if (single_share)
- break;
-- }
-- }
- out:
-
-- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
--
-- return (rc);
--}
--
--ksock_conn_t *
--ksocknal_get_conn_by_idx (int index)
--{
-- ksock_peer_t *peer;
-- struct list_head *ptmp;
-- ksock_conn_t *conn;
-- struct list_head *ctmp;
-- int i;
--
-- read_lock (&ksocknal_data.ksnd_global_lock);
--
-- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-- list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
-- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
--
- LASSERT (!(list_empty (&peer->ksnp_routes) &&
- list_empty (&peer->ksnp_conns)));
- LASSERT (!peer->ksnp_closing);
--
-- list_for_each (ctmp, &peer->ksnp_conns) {
-- if (index-- > 0)
-- continue;
--
-- conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
-- atomic_inc (&conn->ksnc_refcount);
-- read_unlock (&ksocknal_data.ksnd_global_lock);
-- return (conn);
-- }
-- }
-- }
--
-- read_unlock (&ksocknal_data.ksnd_global_lock);
-- return (NULL);
--}
--
- void
- ksocknal_get_peer_addr (ksock_conn_t *conn)
-int
-ksocknal_get_conn_addrs (ksock_conn_t *conn)
--{
-- struct sockaddr_in sin;
-- int len = sizeof (sin);
-- int rc;
--
-- rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock,
-- (struct sockaddr *)&sin, &len, 2);
-- /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
-- LASSERT (!conn->ksnc_closing);
- LASSERT (len <= sizeof (sin));
--
-- if (rc != 0) {
-- CERROR ("Error %d getting sock peer IP\n", rc);
- return;
- return rc;
-- }
--
-- conn->ksnc_ipaddr = ntohl (sin.sin_addr.s_addr);
-- conn->ksnc_port = ntohs (sin.sin_port);
-
- rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock,
- (struct sockaddr *)&sin, &len, 0);
- if (rc != 0) {
- CERROR ("Error %d getting sock local IP\n", rc);
- return rc;
- }
-
- conn->ksnc_myipaddr = ntohl (sin.sin_addr.s_addr);
-
- return 0;
--}
--
--unsigned int
- ksocknal_conn_irq (ksock_conn_t *conn)
-ksocknal_sock_irq (struct socket *sock)
--{
-- int irq = 0;
-- struct dst_entry *dst;
--
- dst = sk_dst_get (conn->ksnc_sock->sk);
- if (!ksocknal_tunables.ksnd_irq_affinity)
- return 0;
-
- dst = sk_dst_get (sock->sk);
-- if (dst != NULL) {
-- if (dst->dev != NULL) {
-- irq = dst->dev->irq;
-- if (irq >= NR_IRQS) {
-- CERROR ("Unexpected IRQ %x\n", irq);
-- irq = 0;
-- }
-- }
-- dst_release (dst);
-- }
--
- /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
- LASSERT (!conn->ksnc_closing);
-- return (irq);
--}
--
--ksock_sched_t *
--ksocknal_choose_scheduler_locked (unsigned int irq)
--{
-- ksock_sched_t *sched;
-- ksock_irqinfo_t *info;
-- int i;
--
-- LASSERT (irq < NR_IRQS);
-- info = &ksocknal_data.ksnd_irqinfo[irq];
--
-- if (irq != 0 && /* hardware NIC */
-- info->ksni_valid) { /* already set up */
-- return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]);
-- }
--
-- /* software NIC (irq == 0) || not associated with a scheduler yet.
-- * Choose the CPU with the fewest connections... */
-- sched = &ksocknal_data.ksnd_schedulers[0];
- for (i = 1; i < SOCKNAL_N_SCHED; i++)
- for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++)
-- if (sched->kss_nconns >
-- ksocknal_data.ksnd_schedulers[i].kss_nconns)
-- sched = &ksocknal_data.ksnd_schedulers[i];
--
-- if (irq != 0) { /* Hardware NIC */
-- info->ksni_valid = 1;
-- info->ksni_sched = sched - ksocknal_data.ksnd_schedulers;
--
-- /* no overflow... */
-- LASSERT (info->ksni_sched == sched - ksocknal_data.ksnd_schedulers);
-- }
--
-- return (sched);
--}
--
--int
- ksocknal_create_conn (ksock_route_t *route, struct socket *sock,
- int bind_irq, int type)
-ksocknal_local_ipvec (__u32 *ipaddrs)
-{
- int i;
- int nip;
-
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- nip = ksocknal_data.ksnd_ninterfaces;
- for (i = 0; i < nip; i++) {
- LASSERT (i < SOCKNAL_MAX_INTERFACES);
-
- ipaddrs[i] = ksocknal_data.ksnd_interfaces[i].ksni_ipaddr;
- LASSERT (ipaddrs[i] != 0);
- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
- return (nip);
-}
-
-int
-ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
-{
- int best_netmatch = 0;
- int best_xor = 0;
- int best = -1;
- int this_xor;
- int this_netmatch;
- int i;
-
- for (i = 0; i < nips; i++) {
- if (ips[i] == 0)
- continue;
-
- this_xor = (ips[i] ^ iface->ksni_ipaddr);
- this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
-
- if (!(best < 0 ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_xor > this_xor)))
- continue;
-
- best = i;
- best_netmatch = this_netmatch;
- best_xor = this_xor;
- }
-
- LASSERT (best >= 0);
- return (best);
-}
-
-int
-ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
-{
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- unsigned long flags;
- ksock_interface_t *iface;
- ksock_interface_t *best_iface;
- int n_ips;
- int i;
- int j;
- int k;
- __u32 ip;
- __u32 xor;
- int this_netmatch;
- int best_netmatch;
- int best_npeers;
-
- /* CAVEAT EMPTOR: We do all our interface matching with an
- * exclusive hold of global lock at IRQ priority. We're only
- * expecting to be dealing with small numbers of interfaces, so the
- * O(n**3)-ness shouldn't matter */
-
- /* Also note that I'm not going to return more than n_peerips
- * interfaces, even if I have more myself */
-
- write_lock_irqsave(global_lock, flags);
-
- LASSERT (n_peerips <= SOCKNAL_MAX_INTERFACES);
- LASSERT (ksocknal_data.ksnd_ninterfaces <= SOCKNAL_MAX_INTERFACES);
-
- n_ips = MIN(n_peerips, ksocknal_data.ksnd_ninterfaces);
-
- for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
- /* ^ yes really... */
-
- /* If we have any new interfaces, first tick off all the
- * peer IPs that match old interfaces, then choose new
- * interfaces to match the remaining peer IPS.
- * We don't forget interfaces we've stopped using; we might
- * start using them again... */
-
- if (i < peer->ksnp_n_passive_ips) {
- /* Old interface. */
- ip = peer->ksnp_passive_ips[i];
- best_iface = ksocknal_ip2iface(ip);
-
- /* peer passive ips are kept up to date */
- LASSERT(best_iface != NULL);
- } else {
- /* choose a new interface */
- LASSERT (i == peer->ksnp_n_passive_ips);
-
- best_iface = NULL;
- best_netmatch = 0;
- best_npeers = 0;
-
- for (j = 0; j < ksocknal_data.ksnd_ninterfaces; j++) {
- iface = &ksocknal_data.ksnd_interfaces[j];
- ip = iface->ksni_ipaddr;
-
- for (k = 0; k < peer->ksnp_n_passive_ips; k++)
- if (peer->ksnp_passive_ips[k] == ip)
- break;
-
- if (k < peer->ksnp_n_passive_ips) /* using it already */
- continue;
-
- k = ksocknal_match_peerip(iface, peerips, n_peerips);
- xor = (ip ^ peerips[k]);
- this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
-
- if (!(best_iface == NULL ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_npeers > iface->ksni_npeers)))
- continue;
-
- best_iface = iface;
- best_netmatch = this_netmatch;
- best_npeers = iface->ksni_npeers;
- }
-
- best_iface->ksni_npeers++;
- ip = best_iface->ksni_ipaddr;
- peer->ksnp_passive_ips[i] = ip;
- peer->ksnp_n_passive_ips = i+1;
- }
-
- LASSERT (best_iface != NULL);
-
- /* mark the best matching peer IP used */
- j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
- peerips[j] = 0;
- }
-
- /* Overwrite input peer IP addresses */
- memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
-
- write_unlock_irqrestore(global_lock, flags);
-
- return (n_ips);
-}
-
-void
-ksocknal_create_routes(ksock_peer_t *peer, int port,
- __u32 *peer_ipaddrs, int npeer_ipaddrs)
-{
- ksock_route_t *newroute = NULL;
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- unsigned long flags;
- struct list_head *rtmp;
- ksock_route_t *route;
- ksock_interface_t *iface;
- ksock_interface_t *best_iface;
- int best_netmatch;
- int this_netmatch;
- int best_nroutes;
- int i;
- int j;
-
- /* CAVEAT EMPTOR: We do all our interface matching with an
- * exclusive hold of global lock at IRQ priority. We're only
- * expecting to be dealing with small numbers of interfaces, so the
- * O(n**3)-ness here shouldn't matter */
-
- write_lock_irqsave(global_lock, flags);
-
- LASSERT (npeer_ipaddrs <= SOCKNAL_MAX_INTERFACES);
-
- for (i = 0; i < npeer_ipaddrs; i++) {
- if (newroute != NULL) {
- newroute->ksnr_ipaddr = peer_ipaddrs[i];
- } else {
- write_unlock_irqrestore(global_lock, flags);
-
- newroute = ksocknal_create_route(peer_ipaddrs[i], port);
- if (newroute == NULL)
- return;
-
- write_lock_irqsave(global_lock, flags);
- }
-
- /* Already got a route? */
- route = NULL;
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, ksock_route_t, ksnr_list);
-
- if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
- break;
-
- route = NULL;
- }
- if (route != NULL)
- continue;
-
- best_iface = NULL;
- best_nroutes = 0;
- best_netmatch = 0;
-
- LASSERT (ksocknal_data.ksnd_ninterfaces <= SOCKNAL_MAX_INTERFACES);
-
- /* Select interface to connect from */
- for (j = 0; j < ksocknal_data.ksnd_ninterfaces; j++) {
- iface = &ksocknal_data.ksnd_interfaces[j];
-
- /* Using this interface already? */
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, ksock_route_t, ksnr_list);
-
- if (route->ksnr_myipaddr == iface->ksni_ipaddr)
- break;
-
- route = NULL;
- }
- if (route != NULL)
- continue;
-
- this_netmatch = (((iface->ksni_ipaddr ^
- newroute->ksnr_ipaddr) &
- iface->ksni_netmask) == 0) ? 1 : 0;
-
- if (!(best_iface == NULL ||
- best_netmatch < this_netmatch ||
- (best_netmatch == this_netmatch &&
- best_nroutes > iface->ksni_nroutes)))
- continue;
-
- best_iface = iface;
- best_netmatch = this_netmatch;
- best_nroutes = iface->ksni_nroutes;
- }
-
- if (best_iface == NULL)
- continue;
-
- newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
- best_iface->ksni_nroutes++;
-
- ksocknal_add_route_locked(peer, newroute);
- newroute = NULL;
- }
-
- write_unlock_irqrestore(global_lock, flags);
- if (newroute != NULL)
- ksocknal_put_route(newroute);
-}
-
-int
-ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type)
--{
- int passive = (type == SOCKNAL_CONN_NONE);
- rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
- __u32 ipaddrs[SOCKNAL_MAX_INTERFACES];
- int nipaddrs;
-- ptl_nid_t nid;
- struct list_head *tmp;
-- __u64 incarnation;
-- unsigned long flags;
-- ksock_conn_t *conn;
- ksock_peer_t *peer;
- ksock_conn_t *conn2;
- ksock_peer_t *peer = NULL;
-- ksock_peer_t *peer2;
-- ksock_sched_t *sched;
-- unsigned int irq;
-- ksock_tx_t *tx;
-- int rc;
--
-- /* NB, sock has an associated file since (a) this connection might
-- * have been created in userland and (b) we need to refcount the
-- * socket so that we don't close it while I/O is being done on
-- * it, and sock->file has that pre-cooked... */
-- LASSERT (sock->file != NULL);
-- LASSERT (file_count(sock->file) > 0);
- LASSERT (route == NULL || !passive);
--
-- rc = ksocknal_setup_sock (sock);
-- if (rc != 0)
-- return (rc);
-
- if (route == NULL) {
- /* acceptor or explicit connect */
- nid = PTL_NID_ANY;
- } else {
- LASSERT (type != SOCKNAL_CONN_NONE);
- /* autoconnect: expect this nid on exchange */
- nid = route->ksnr_peer->ksnp_nid;
- }
--
- rc = ksocknal_hello (sock, &nid, &type, &incarnation);
- if (rc != 0)
- return (rc);
-
- peer = NULL;
- if (route == NULL) { /* not autoconnect */
- /* Assume this socket connects to a brand new peer */
- peer = ksocknal_create_peer (nid);
- if (peer == NULL)
- return (-ENOMEM);
- }
- irq = ksocknal_sock_irq (sock);
--
-- PORTAL_ALLOC(conn, sizeof(*conn));
- if (conn == NULL) {
- if (peer != NULL)
- ksocknal_put_peer (peer);
- if (conn == NULL)
-- return (-ENOMEM);
- }
--
-- memset (conn, 0, sizeof (*conn));
-- conn->ksnc_peer = NULL;
-- conn->ksnc_route = NULL;
-- conn->ksnc_sock = sock;
-- conn->ksnc_type = type;
- conn->ksnc_incarnation = incarnation;
-- conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
-- conn->ksnc_saved_write_space = sock->sk->sk_write_space;
-- atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for me */
--
-- conn->ksnc_rx_ready = 0;
-- conn->ksnc_rx_scheduled = 0;
-- ksocknal_new_packet (conn, 0);
--
-- INIT_LIST_HEAD (&conn->ksnc_tx_queue);
-- conn->ksnc_tx_ready = 0;
-- conn->ksnc_tx_scheduled = 0;
-- atomic_set (&conn->ksnc_tx_nob, 0);
--
- ksocknal_get_peer_addr (conn);
- /* stash conn's local and remote addrs */
- rc = ksocknal_get_conn_addrs (conn);
- if (rc != 0)
- goto failed_0;
--
- irq = ksocknal_conn_irq (conn);
- if (!passive) {
- /* Active connection sends HELLO eagerly */
- rc = ksocknal_local_ipvec(ipaddrs);
- if (rc < 0)
- goto failed_0;
- nipaddrs = rc;
--
- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
- rc = ksocknal_send_hello (conn, ipaddrs, nipaddrs);
- if (rc != 0)
- goto failed_0;
- }
-
- /* Find out/confirm peer's NID and connection type and get the
- * vector of interfaces she's willing to let me connect to */
- nid = (route == NULL) ? PTL_NID_ANY : route->ksnr_peer->ksnp_nid;
- rc = ksocknal_recv_hello (conn, &nid, &incarnation, ipaddrs);
- if (rc < 0)
- goto failed_0;
- nipaddrs = rc;
- LASSERT (nid != PTL_NID_ANY);
--
-- if (route != NULL) {
- /* Autoconnected! */
- LASSERT ((route->ksnr_connected & (1 << type)) == 0);
- LASSERT ((route->ksnr_connecting & (1 << type)) != 0);
- peer = route->ksnr_peer;
- atomic_inc(&peer->ksnp_refcount);
- } else {
- peer = ksocknal_create_peer(nid);
- if (peer == NULL) {
- rc = -ENOMEM;
- goto failed_0;
- }
--
- if (route->ksnr_deleted) {
- /* This conn was autoconnected, but the autoconnect
- * route got deleted while it was being
- * established! */
- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock,
- flags);
- PORTAL_FREE (conn, sizeof (*conn));
- return (-ESTALE);
- write_lock_irqsave(global_lock, flags);
-
- peer2 = ksocknal_find_peer_locked(nid);
- if (peer2 == NULL) {
- /* NB this puts an "empty" peer in the peer
- * table (which takes my ref) */
- list_add_tail(&peer->ksnp_list,
- ksocknal_nid2peerlist(nid));
- } else {
- ksocknal_put_peer(peer);
- peer = peer2;
-- }
- /* +1 ref for me */
- atomic_inc(&peer->ksnp_refcount);
--
- write_unlock_irqrestore(global_lock, flags);
- }
-
- if (!passive) {
- ksocknal_create_routes(peer, conn->ksnc_port,
- ipaddrs, nipaddrs);
- rc = 0;
- } else {
- rc = ksocknal_select_ips(peer, ipaddrs, nipaddrs);
- LASSERT (rc >= 0);
- rc = ksocknal_send_hello (conn, ipaddrs, rc);
- }
- if (rc < 0)
- goto failed_1;
-
- write_lock_irqsave (global_lock, flags);
--
- /* associate conn/route */
- conn->ksnc_route = route;
- atomic_inc (&route->ksnr_refcount);
- if (peer->ksnp_closing ||
- (route != NULL && route->ksnr_deleted)) {
- /* route/peer got closed under me */
- rc = -ESTALE;
- goto failed_2;
- }
--
- route->ksnr_connecting &= ~(1 << type);
- route->ksnr_connected |= (1 << type);
- route->ksnr_conn_count++;
- route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL;
- /* Refuse to duplicate an existing connection (both sides might
- * autoconnect at once), unless this is a loopback connection */
- if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
- list_for_each(tmp, &peer->ksnp_conns) {
- conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
--
- peer = route->ksnr_peer;
- } else {
- /* Not an autoconnected connection; see if there is an
- * existing peer for this NID */
- peer2 = ksocknal_find_peer_locked (nid);
- if (peer2 != NULL) {
- ksocknal_put_peer (peer);
- peer = peer2;
- } else {
- list_add (&peer->ksnp_list,
- ksocknal_nid2peerlist (nid));
- /* peer list takes over existing ref */
- if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
- conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
- conn2->ksnc_type != conn->ksnc_type ||
- conn2->ksnc_incarnation != incarnation)
- continue;
-
- CWARN("Not creating duplicate connection to "
- "%u.%u.%u.%u type %d\n",
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_type);
- rc = -EALREADY;
- goto failed_2;
-- }
-- }
--
- /* Give conn a ref on sock->file since we're going to return success */
- get_file(sock->file);
- /* If the connection created by this route didn't bind to the IP
- * address the route connected to, the connection/route matching
- * code below probably isn't going to work. */
- if (route != NULL &&
- route->ksnr_ipaddr != conn->ksnc_ipaddr) {
- CERROR("Route "LPX64" %u.%u.%u.%u connected to %u.%u.%u.%u\n",
- peer->ksnp_nid,
- HIPQUAD(route->ksnr_ipaddr),
- HIPQUAD(conn->ksnc_ipaddr));
- }
--
- LASSERT (!peer->ksnp_closing);
- /* Search for a route corresponding to the new connection and
- * create an association. This allows incoming connections created
- * by routes in my peer to match my own route entries so I don't
- * continually create duplicate routes. */
- list_for_each (tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t, ksnr_list);
--
- conn->ksnc_peer = peer;
- atomic_inc (&peer->ksnp_refcount);
- if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
- continue;
-
- ksocknal_associate_route_conn_locked(route, conn);
- break;
- }
-
- conn->ksnc_peer = peer; /* conn takes my ref on peer */
- conn->ksnc_incarnation = incarnation;
-- peer->ksnp_last_alive = jiffies;
-- peer->ksnp_error = 0;
-
- sched = ksocknal_choose_scheduler_locked (irq);
- sched->kss_nconns++;
- conn->ksnc_scheduler = sched;
--
-- /* Set the deadline for the outgoing HELLO to drain */
- conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
-- conn->ksnc_tx_deadline = jiffies +
- ksocknal_data.ksnd_io_timeout * HZ;
- ksocknal_tunables.ksnd_io_timeout * HZ;
- mb(); /* order with adding to peer's conn list */
--
-- list_add (&conn->ksnc_list, &peer->ksnp_conns);
-- atomic_inc (&conn->ksnc_refcount);
-
- sched = ksocknal_choose_scheduler_locked (irq);
- sched->kss_nconns++;
- conn->ksnc_scheduler = sched;
--
-- /* NB my callbacks block while I hold ksnd_global_lock */
-- sock->sk->sk_user_data = conn;
-- sock->sk->sk_data_ready = ksocknal_data_ready;
-- sock->sk->sk_write_space = ksocknal_write_space;
--
-- /* Take all the packets blocking for a connection.
-- * NB, it might be nicer to share these blocked packets among any
- * other connections that are becoming established, however that
- * confuses the normal packet launching operation, which selects a
- * connection and queues the packet on it without needing an
- * exclusive lock on ksnd_global_lock. */
- * other connections that are becoming established. */
-- while (!list_empty (&peer->ksnp_tx_queue)) {
-- tx = list_entry (peer->ksnp_tx_queue.next,
-- ksock_tx_t, tx_list);
--
-- list_del (&tx->tx_list);
-- ksocknal_queue_tx_locked (tx, conn);
-- }
-
- rc = ksocknal_close_stale_conns_locked (peer, incarnation);
-
- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
--
- rc = ksocknal_close_stale_conns_locked(peer, incarnation);
-- if (rc != 0)
-- CERROR ("Closed %d stale conns to nid "LPX64" ip %d.%d.%d.%d\n",
-- rc, conn->ksnc_peer->ksnp_nid,
-- HIPQUAD(conn->ksnc_ipaddr));
--
- if (bind_irq) /* irq binding required */
- ksocknal_bind_irq (irq);
- write_unlock_irqrestore (global_lock, flags);
-
- ksocknal_bind_irq (irq);
--
-- /* Call the callbacks right now to get things going. */
- ksocknal_data_ready (sock->sk, 0);
- ksocknal_write_space (sock->sk);
- if (ksocknal_getconnsock(conn) == 0) {
- ksocknal_data_ready (sock->sk, 0);
- ksocknal_write_space (sock->sk);
- ksocknal_putconnsock(conn);
- }
--
- CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64" ip %d.%d.%d.%d\n",
- conn, conn->ksnc_peer->ksnp_nid, HIPQUAD(conn->ksnc_ipaddr));
- CWARN("New conn nid:"LPX64" [type:%d] %u.%u.%u.%u -> %u.%u.%u.%u/%d"
- " incarnation:"LPX64" sched[%d]/%d\n",
- nid, conn->ksnc_type, HIPQUAD(conn->ksnc_myipaddr),
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation,
- (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
--
-- ksocknal_put_conn (conn);
-- return (0);
-
- failed_2:
- if (!peer->ksnp_closing &&
- list_empty (&peer->ksnp_conns) &&
- list_empty (&peer->ksnp_routes))
- ksocknal_unlink_peer_locked(peer);
- write_unlock_irqrestore(global_lock, flags);
-
- failed_1:
- ksocknal_put_peer (peer);
-
- failed_0:
- PORTAL_FREE (conn, sizeof(*conn));
-
- LASSERT (rc != 0);
- return (rc);
--}
--
--void
--ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
--{
-- /* This just does the immmediate housekeeping, and queues the
-- * connection for the reaper to terminate.
-- * Caller holds ksnd_global_lock exclusively in irq context */
- ksock_peer_t *peer = conn->ksnc_peer;
- ksock_route_t *route;
- ksock_peer_t *peer = conn->ksnc_peer;
- ksock_route_t *route;
- ksock_conn_t *conn2;
- struct list_head *tmp;
--
-- LASSERT (peer->ksnp_error == 0);
-- LASSERT (!conn->ksnc_closing);
-- conn->ksnc_closing = 1;
-- atomic_inc (&ksocknal_data.ksnd_nclosing_conns);
--
- /* ksnd_deathrow_conns takes over peer's ref */
- list_del (&conn->ksnc_list);
-
-- route = conn->ksnc_route;
-- if (route != NULL) {
-- /* dissociate conn from route... */
-- LASSERT (!route->ksnr_deleted);
-- LASSERT ((route->ksnr_connecting & (1 << conn->ksnc_type)) == 0);
-- LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
--
- route->ksnr_connected &= ~(1 << conn->ksnc_type);
- conn2 = NULL;
- list_for_each(tmp, &peer->ksnp_conns) {
- conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- if (conn2->ksnc_route == route &&
- conn2->ksnc_type == conn->ksnc_type)
- break;
-
- conn2 = NULL;
- }
- if (conn2 == NULL)
- route->ksnr_connected &= ~(1 << conn->ksnc_type);
-
-- conn->ksnc_route = NULL;
--
-#if 0 /* irrelevent with only eager routes */
-- list_del (&route->ksnr_list); /* make route least favourite */
-- list_add_tail (&route->ksnr_list, &peer->ksnp_routes);
-
-#endif
-- ksocknal_put_route (route); /* drop conn's ref on route */
-- }
-
- /* ksnd_deathrow_conns takes over peer's ref */
- list_del (&conn->ksnc_list);
--
-- if (list_empty (&peer->ksnp_conns)) {
-- /* No more connections to this peer */
--
-- peer->ksnp_error = error; /* stash last conn close reason */
--
-- if (list_empty (&peer->ksnp_routes)) {
-- /* I've just closed last conn belonging to a
-- * non-autoconnecting peer */
-- ksocknal_unlink_peer_locked (peer);
-- }
-- }
--
-- spin_lock (&ksocknal_data.ksnd_reaper_lock);
--
-- list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns);
-- wake_up (&ksocknal_data.ksnd_reaper_waitq);
--
-- spin_unlock (&ksocknal_data.ksnd_reaper_lock);
--}
--
--void
--ksocknal_terminate_conn (ksock_conn_t *conn)
--{
-- /* This gets called by the reaper (guaranteed thread context) to
-- * disengage the socket from its callbacks and close it.
-- * ksnc_refcount will eventually hit zero, and then the reaper will
-- * destroy it. */
-- unsigned long flags;
-- ksock_peer_t *peer = conn->ksnc_peer;
-- ksock_sched_t *sched = conn->ksnc_scheduler;
-- struct timeval now;
-- time_t then = 0;
-- int notify = 0;
--
-- LASSERT(conn->ksnc_closing);
--
-- /* wake up the scheduler to "send" all remaining packets to /dev/null */
-- spin_lock_irqsave(&sched->kss_lock, flags);
--
-- if (!conn->ksnc_tx_scheduled &&
-- !list_empty(&conn->ksnc_tx_queue)){
-- list_add_tail (&conn->ksnc_tx_list,
-- &sched->kss_tx_conns);
-- /* a closing conn is always ready to tx */
-- conn->ksnc_tx_ready = 1;
-- conn->ksnc_tx_scheduled = 1;
-- /* extra ref for scheduler */
-- atomic_inc (&conn->ksnc_refcount);
--
-- wake_up (&sched->kss_waitq);
-- }
--
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
--
-- /* serialise with callbacks */
-- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
--
-- /* Remove conn's network callbacks.
-- * NB I _have_ to restore the callback, rather than storing a noop,
-- * since the socket could survive past this module being unloaded!! */
-- conn->ksnc_sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
-- conn->ksnc_sock->sk->sk_write_space = conn->ksnc_saved_write_space;
--
-- /* A callback could be in progress already; they hold a read lock
-- * on ksnd_global_lock (to serialise with me) and NOOP if
-- * sk_user_data is NULL. */
-- conn->ksnc_sock->sk->sk_user_data = NULL;
--
-- /* OK, so this conn may not be completely disengaged from its
-- * scheduler yet, but it _has_ committed to terminate... */
-- conn->ksnc_scheduler->kss_nconns--;
--
-- if (peer->ksnp_error != 0) {
-- /* peer's last conn closed in error */
-- LASSERT (list_empty (&peer->ksnp_conns));
--
-- /* convert peer's last-known-alive timestamp from jiffies */
-- do_gettimeofday (&now);
-- then = now.tv_sec - (jiffies - peer->ksnp_last_alive)/HZ;
-- notify = 1;
-- }
--
-- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
--
-- /* The socket is closed on the final put; either here, or in
-- * ksocknal_{send,recv}msg(). Since we set up the linger2 option
-- * when the connection was established, this will close the socket
-- * immediately, aborting anything buffered in it. Any hung
-- * zero-copy transmits will therefore complete in finite time. */
-- ksocknal_putconnsock (conn);
--
-- if (notify)
-- kpr_notify (&ksocknal_data.ksnd_router, peer->ksnp_nid,
-- 0, then);
--}
--
--void
--ksocknal_destroy_conn (ksock_conn_t *conn)
--{
-- /* Final coup-de-grace of the reaper */
-- CDEBUG (D_NET, "connection %p\n", conn);
--
-- LASSERT (atomic_read (&conn->ksnc_refcount) == 0);
-- LASSERT (conn->ksnc_route == NULL);
-- LASSERT (!conn->ksnc_tx_scheduled);
-- LASSERT (!conn->ksnc_rx_scheduled);
-- LASSERT (list_empty(&conn->ksnc_tx_queue));
--
-- /* complete current receive if any */
-- switch (conn->ksnc_rx_state) {
-- case SOCKNAL_RX_BODY:
-- CERROR("Completing partial receive from "LPX64
-- ", ip %d.%d.%d.%d:%d, with error\n",
-- conn->ksnc_peer->ksnp_nid,
-- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
-- lib_finalize (&ksocknal_lib, NULL, conn->ksnc_cookie, PTL_FAIL);
-- break;
-- case SOCKNAL_RX_BODY_FWD:
-- ksocknal_fmb_callback (conn->ksnc_cookie, -ECONNABORTED);
-- break;
-- case SOCKNAL_RX_HEADER:
-- case SOCKNAL_RX_SLOP:
-- break;
-- default:
-- LBUG ();
-- break;
-- }
--
-- ksocknal_put_peer (conn->ksnc_peer);
--
-- PORTAL_FREE (conn, sizeof (*conn));
-- atomic_dec (&ksocknal_data.ksnd_nclosing_conns);
--}
--
--void
--ksocknal_put_conn (ksock_conn_t *conn)
--{
-- unsigned long flags;
--
-- CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n",
-- conn, conn->ksnc_peer->ksnp_nid,
-- atomic_read (&conn->ksnc_refcount));
--
-- LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
-- if (!atomic_dec_and_test (&conn->ksnc_refcount))
-- return;
--
-- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
--
-- list_add (&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
-- wake_up (&ksocknal_data.ksnd_reaper_waitq);
--
-- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
--}
--
--int
--ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
--{
-- ksock_conn_t *conn;
-- struct list_head *ctmp;
-- struct list_head *cnxt;
-- int count = 0;
--
-- list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
-- conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
--
-- if (ipaddr == 0 ||
-- conn->ksnc_ipaddr == ipaddr) {
-- count++;
-- ksocknal_close_conn_locked (conn, why);
-- }
-- }
--
-- return (count);
--}
--
--int
--ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation)
--{
-- ksock_conn_t *conn;
-- struct list_head *ctmp;
-- struct list_head *cnxt;
-- int count = 0;
--
-- list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
-- conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
--
-- if (conn->ksnc_incarnation == incarnation)
-- continue;
-
- CWARN("Closing stale conn nid:"LPX64" ip:%08x/%d "
- "incarnation:"LPX64"("LPX64")\n",
- peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port,
- conn->ksnc_incarnation, incarnation);
--
-- count++;
-- ksocknal_close_conn_locked (conn, -ESTALE);
-- }
--
-- return (count);
--}
--
--int
--ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
--{
-- ksock_peer_t *peer = conn->ksnc_peer;
-- __u32 ipaddr = conn->ksnc_ipaddr;
-- unsigned long flags;
-- int count;
--
-- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
--
-- count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
--
-- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
--
-- return (count);
--}
--
--int
--ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr)
--{
-- unsigned long flags;
-- ksock_peer_t *peer;
-- struct list_head *ptmp;
-- struct list_head *pnxt;
-- int lo;
-- int hi;
-- int i;
-- int count = 0;
--
-- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
--
-- if (nid != PTL_NID_ANY)
-- lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers;
-- else {
-- lo = 0;
-- hi = ksocknal_data.ksnd_peer_hash_size - 1;
-- }
--
-- for (i = lo; i <= hi; i++) {
-- list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
--
-- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
--
-- if (!(nid == PTL_NID_ANY || nid == peer->ksnp_nid))
-- continue;
--
-- count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
-- }
-- }
--
-- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
--
-- /* wildcards always succeed */
-- if (nid == PTL_NID_ANY || ipaddr == 0)
-- return (0);
--
-- return (count == 0 ? -ENOENT : 0);
--}
--
--void
--ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive)
--{
-- /* The router is telling me she's been notified of a change in
-- * gateway state.... */
--
-- CDEBUG (D_NET, "gw "LPX64" %s\n", gw_nid, alive ? "up" : "down");
--
-- if (!alive) {
-- /* If the gateway crashed, close all open connections... */
-- ksocknal_close_matching_conns (gw_nid, 0);
-- return;
-- }
--
-- /* ...otherwise do nothing. We can only establish new connections
-- * if we have autroutes, and these connect on demand. */
--}
--
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--struct tcp_opt *sock2tcp_opt(struct sock *sk)
--{
-- return &(sk->tp_pinfo.af_tcp);
--}
--#else
--struct tcp_opt *sock2tcp_opt(struct sock *sk)
--{
-- struct tcp_sock *s = (struct tcp_sock *)sk;
-- return &s->tcp;
--}
--#endif
--
--void
--ksocknal_push_conn (ksock_conn_t *conn)
--{
-- struct sock *sk;
-- struct tcp_opt *tp;
-- int nonagle;
-- int val = 1;
-- int rc;
-- mm_segment_t oldmm;
--
-- rc = ksocknal_getconnsock (conn);
-- if (rc != 0) /* being shut down */
-- return;
--
-- sk = conn->ksnc_sock->sk;
-- tp = sock2tcp_opt(sk);
--
-- lock_sock (sk);
-- nonagle = tp->nonagle;
-- tp->nonagle = 1;
-- release_sock (sk);
--
-- oldmm = get_fs ();
-- set_fs (KERNEL_DS);
--
-- rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
-- (char *)&val, sizeof (val));
-- LASSERT (rc == 0);
--
-- set_fs (oldmm);
--
-- lock_sock (sk);
-- tp->nonagle = nonagle;
-- release_sock (sk);
--
-- ksocknal_putconnsock (conn);
--}
--
--void
--ksocknal_push_peer (ksock_peer_t *peer)
--{
-- int index;
-- int i;
-- struct list_head *tmp;
-- ksock_conn_t *conn;
--
-- for (index = 0; ; index++) {
-- read_lock (&ksocknal_data.ksnd_global_lock);
--
-- i = 0;
-- conn = NULL;
--
-- list_for_each (tmp, &peer->ksnp_conns) {
-- if (i++ == index) {
-- conn = list_entry (tmp, ksock_conn_t, ksnc_list);
-- atomic_inc (&conn->ksnc_refcount);
-- break;
-- }
-- }
--
-- read_unlock (&ksocknal_data.ksnd_global_lock);
--
-- if (conn == NULL)
-- break;
--
-- ksocknal_push_conn (conn);
-- ksocknal_put_conn (conn);
-- }
--}
--
--int
--ksocknal_push (ptl_nid_t nid)
--{
-- ksock_peer_t *peer;
-- struct list_head *tmp;
-- int index;
-- int i;
-- int j;
-- int rc = -ENOENT;
--
-- if (nid != PTL_NID_ANY) {
-- peer = ksocknal_get_peer (nid);
--
-- if (peer != NULL) {
-- rc = 0;
-- ksocknal_push_peer (peer);
-- ksocknal_put_peer (peer);
-- }
-- return (rc);
-- }
--
-- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-- for (j = 0; ; j++) {
-- read_lock (&ksocknal_data.ksnd_global_lock);
--
-- index = 0;
-- peer = NULL;
--
-- list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
-- if (index++ == j) {
-- peer = list_entry(tmp, ksock_peer_t,
-- ksnp_list);
-- atomic_inc (&peer->ksnp_refcount);
-- break;
-- }
-- }
--
-- read_unlock (&ksocknal_data.ksnd_global_lock);
--
-- if (peer != NULL) {
-- rc = 0;
-- ksocknal_push_peer (peer);
-- ksocknal_put_peer (peer);
-- }
-- }
--
-- }
--
-- return (rc);
--}
--
--int
- ksocknal_cmd(struct portals_cfg *pcfg, void * private)
-ksocknal_add_interface(__u32 ipaddress, __u32 netmask)
--{
- int rc = -EINVAL;
- unsigned long flags;
- ksock_interface_t *iface;
- int rc;
- int i;
- int j;
- struct list_head *ptmp;
- ksock_peer_t *peer;
- struct list_head *rtmp;
- ksock_route_t *route;
--
- LASSERT (pcfg != NULL);
- if (ipaddress == 0 ||
- netmask == 0)
- return (-EINVAL);
-
- write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
-
- iface = ksocknal_ip2iface(ipaddress);
- if (iface != NULL) {
- /* silently ignore dups */
- rc = 0;
- } else if (ksocknal_data.ksnd_ninterfaces == SOCKNAL_MAX_INTERFACES) {
- rc = -ENOSPC;
- } else {
- iface = &ksocknal_data.ksnd_interfaces[ksocknal_data.ksnd_ninterfaces++];
-
- iface->ksni_ipaddr = ipaddress;
- iface->ksni_netmask = netmask;
- iface->ksni_nroutes = 0;
- iface->ksni_npeers = 0;
-
- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
- list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
- peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
-
- for (j = 0; i < peer->ksnp_n_passive_ips; j++)
- if (peer->ksnp_passive_ips[j] == ipaddress)
- iface->ksni_npeers++;
-
- list_for_each(rtmp, &peer->ksnp_routes) {
- route = list_entry(rtmp, ksock_route_t, ksnr_list);
-
- if (route->ksnr_myipaddr == ipaddress)
- iface->ksni_nroutes++;
- }
- }
- }
-
- rc = 0;
- /* NB only new connections will pay attention to the new interface! */
- }
-
- write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
-
- return (rc);
-}
-
-void
-ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
-{
- struct list_head *tmp;
- struct list_head *nxt;
- ksock_route_t *route;
- ksock_conn_t *conn;
- int i;
- int j;
-
- for (i = 0; i < peer->ksnp_n_passive_ips; i++)
- if (peer->ksnp_passive_ips[i] == ipaddr) {
- for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
- peer->ksnp_passive_ips[j-1] =
- peer->ksnp_passive_ips[j];
- peer->ksnp_n_passive_ips--;
- break;
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
- route = list_entry (tmp, ksock_route_t, ksnr_list);
-
- if (route->ksnr_myipaddr != ipaddr)
- continue;
-
- if (route->ksnr_share_count != 0) {
- /* Manually created; keep, but unbind */
- route->ksnr_myipaddr = 0;
- } else {
- ksocknal_del_route_locked(route);
- }
- }
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
- conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_myipaddr == ipaddr)
- ksocknal_close_conn_locked (conn, 0);
- }
-}
-
-int
-ksocknal_del_interface(__u32 ipaddress)
-{
- int rc = -ENOENT;
- unsigned long flags;
- struct list_head *tmp;
- struct list_head *nxt;
- ksock_peer_t *peer;
- __u32 this_ip;
- int i;
- int j;
-
- write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
-
- for (i = 0; i < ksocknal_data.ksnd_ninterfaces; i++) {
- this_ip = ksocknal_data.ksnd_interfaces[i].ksni_ipaddr;
-
- if (!(ipaddress == 0 ||
- ipaddress == this_ip))
- continue;
-
- rc = 0;
-
- for (j = i+1; j < ksocknal_data.ksnd_ninterfaces; j++)
- ksocknal_data.ksnd_interfaces[j-1] =
- ksocknal_data.ksnd_interfaces[j];
-
- ksocknal_data.ksnd_ninterfaces--;
-
- for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
- list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) {
- peer = list_entry(tmp, ksock_peer_t, ksnp_list);
-
- ksocknal_peer_del_interface_locked(peer, this_ip);
- }
- }
- }
-
- write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
-
- return (rc);
-}
-
-int
-ksocknal_cmd(struct portals_cfg *pcfg, void * private)
-{
- int rc;
--
-- switch(pcfg->pcfg_command) {
- case NAL_CMD_GET_AUTOCONN: {
- ksock_route_t *route = ksocknal_get_route_by_idx (pcfg->pcfg_count);
- case NAL_CMD_GET_INTERFACE: {
- ksock_interface_t *iface;
--
- if (route == NULL)
- read_lock (&ksocknal_data.ksnd_global_lock);
-
- if (pcfg->pcfg_count < 0 ||
- pcfg->pcfg_count >= ksocknal_data.ksnd_ninterfaces) {
-- rc = -ENOENT;
- else {
- } else {
-- rc = 0;
- pcfg->pcfg_nid = route->ksnr_peer->ksnp_nid;
- pcfg->pcfg_id = route->ksnr_ipaddr;
- pcfg->pcfg_misc = route->ksnr_port;
- pcfg->pcfg_count = route->ksnr_conn_count;
- pcfg->pcfg_size = route->ksnr_buffer_size;
- pcfg->pcfg_wait = route->ksnr_sharecount;
- pcfg->pcfg_flags = (route->ksnr_irq_affinity ? 2 : 0) |
- (route->ksnr_eager ? 4 : 0);
- ksocknal_put_route (route);
- iface = &ksocknal_data.ksnd_interfaces[pcfg->pcfg_count];
-
- pcfg->pcfg_id = iface->ksni_ipaddr;
- pcfg->pcfg_misc = iface->ksni_netmask;
- pcfg->pcfg_fd = iface->ksni_npeers;
- pcfg->pcfg_count = iface->ksni_nroutes;
-- }
-
- read_unlock (&ksocknal_data.ksnd_global_lock);
-- break;
-- }
- case NAL_CMD_ADD_AUTOCONN: {
- rc = ksocknal_add_route (pcfg->pcfg_nid, pcfg->pcfg_id,
- pcfg->pcfg_misc, pcfg->pcfg_size,
- (pcfg->pcfg_flags & 0x02) != 0,
- (pcfg->pcfg_flags & 0x04) != 0,
- (pcfg->pcfg_flags & 0x08) != 0);
- case NAL_CMD_ADD_INTERFACE: {
- rc = ksocknal_add_interface(pcfg->pcfg_id, /* IP address */
- pcfg->pcfg_misc); /* net mask */
-- break;
-- }
- case NAL_CMD_DEL_AUTOCONN: {
- rc = ksocknal_del_route (pcfg->pcfg_nid, pcfg->pcfg_id,
- (pcfg->pcfg_flags & 1) != 0,
- (pcfg->pcfg_flags & 2) != 0);
- case NAL_CMD_DEL_INTERFACE: {
- rc = ksocknal_del_interface(pcfg->pcfg_id); /* IP address */
- break;
- }
- case NAL_CMD_GET_PEER: {
- ptl_nid_t nid = 0;
- __u32 myip = 0;
- __u32 ip = 0;
- int port = 0;
- int conn_count = 0;
- int share_count = 0;
-
- rc = ksocknal_get_peer_info(pcfg->pcfg_count, &nid,
- &myip, &ip, &port,
- &conn_count, &share_count);
- pcfg->pcfg_nid = nid;
- pcfg->pcfg_size = myip;
- pcfg->pcfg_id = ip;
- pcfg->pcfg_misc = port;
- pcfg->pcfg_count = conn_count;
- pcfg->pcfg_wait = share_count;
- break;
- }
- case NAL_CMD_ADD_PEER: {
- rc = ksocknal_add_peer (pcfg->pcfg_nid,
- pcfg->pcfg_id, /* IP */
- pcfg->pcfg_misc); /* port */
- break;
- }
- case NAL_CMD_DEL_PEER: {
- rc = ksocknal_del_peer (pcfg->pcfg_nid,
- pcfg->pcfg_id, /* IP */
- pcfg->pcfg_flags); /* single_share? */
-- break;
-- }
-- case NAL_CMD_GET_CONN: {
-- ksock_conn_t *conn = ksocknal_get_conn_by_idx (pcfg->pcfg_count);
--
-- if (conn == NULL)
-- rc = -ENOENT;
-- else {
- int txmem;
- int rxmem;
- int nagle;
-
- ksocknal_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
-
-- rc = 0;
- pcfg->pcfg_nid = conn->ksnc_peer->ksnp_nid;
- pcfg->pcfg_id = conn->ksnc_ipaddr;
- pcfg->pcfg_misc = conn->ksnc_port;
- pcfg->pcfg_flags = conn->ksnc_type;
- pcfg->pcfg_nid = conn->ksnc_peer->ksnp_nid;
- pcfg->pcfg_id = conn->ksnc_ipaddr;
- pcfg->pcfg_misc = conn->ksnc_port;
- pcfg->pcfg_fd = conn->ksnc_myipaddr;
- pcfg->pcfg_flags = conn->ksnc_type;
- pcfg->pcfg_gw_nal = conn->ksnc_scheduler -
- ksocknal_data.ksnd_schedulers;
- pcfg->pcfg_count = txmem;
- pcfg->pcfg_size = rxmem;
- pcfg->pcfg_wait = nagle;
-- ksocknal_put_conn (conn);
-- }
-- break;
-- }
-- case NAL_CMD_REGISTER_PEER_FD: {
-- struct socket *sock = sockfd_lookup (pcfg->pcfg_fd, &rc);
-- int type = pcfg->pcfg_misc;
--
-- if (sock == NULL)
-- break;
--
-- switch (type) {
-- case SOCKNAL_CONN_NONE:
-- case SOCKNAL_CONN_ANY:
-- case SOCKNAL_CONN_CONTROL:
-- case SOCKNAL_CONN_BULK_IN:
-- case SOCKNAL_CONN_BULK_OUT:
- rc = ksocknal_create_conn(NULL, sock, pcfg->pcfg_flags, type);
- rc = ksocknal_create_conn(NULL, sock, type);
- break;
-- default:
- rc = -EINVAL;
-- break;
-- }
-- if (rc != 0)
-- fput (sock->file);
-- break;
-- }
-- case NAL_CMD_CLOSE_CONNECTION: {
-- rc = ksocknal_close_matching_conns (pcfg->pcfg_nid,
-- pcfg->pcfg_id);
-- break;
-- }
-- case NAL_CMD_REGISTER_MYNID: {
-- rc = ksocknal_set_mynid (pcfg->pcfg_nid);
-- break;
-- }
-- case NAL_CMD_PUSH_CONNECTION: {
-- rc = ksocknal_push (pcfg->pcfg_nid);
-- break;
-- }
- default:
- rc = -EINVAL;
- break;
-- }
--
-- return rc;
--}
--
--void
--ksocknal_free_fmbs (ksock_fmb_pool_t *p)
--{
-- int npages = p->fmp_buff_pages;
-- ksock_fmb_t *fmb;
-- int i;
--
-- LASSERT (list_empty(&p->fmp_blocked_conns));
-- LASSERT (p->fmp_nactive_fmbs == 0);
--
-- while (!list_empty(&p->fmp_idle_fmbs)) {
--
-- fmb = list_entry(p->fmp_idle_fmbs.next,
-- ksock_fmb_t, fmb_list);
--
-- for (i = 0; i < npages; i++)
-- if (fmb->fmb_kiov[i].kiov_page != NULL)
-- __free_page(fmb->fmb_kiov[i].kiov_page);
--
-- list_del(&fmb->fmb_list);
-- PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages]));
-- }
--}
--
--void
--ksocknal_free_buffers (void)
--{
-- ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp);
-- ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp);
--
-- LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0);
--
-- if (ksocknal_data.ksnd_schedulers != NULL)
-- PORTAL_FREE (ksocknal_data.ksnd_schedulers,
- sizeof (ksock_sched_t) * SOCKNAL_N_SCHED);
- sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
--
-- PORTAL_FREE (ksocknal_data.ksnd_peers,
-- sizeof (struct list_head) *
-- ksocknal_data.ksnd_peer_hash_size);
--}
--
--void
- ksocknal_module_fini (void)
-ksocknal_api_shutdown (nal_t *nal)
--{
- int i;
- ksock_sched_t *sched;
- int i;
-
- if (nal->nal_refct != 0) {
- /* This module got the first ref */
- PORTAL_MODULE_UNUSE;
- return;
- }
--
-- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-- atomic_read (&portal_kmemory));
-
- LASSERT(nal == &ksocknal_api);
--
-- switch (ksocknal_data.ksnd_init) {
-- default:
-- LASSERT (0);
--
-- case SOCKNAL_INIT_ALL:
- #if CONFIG_SYSCTL
- if (ksocknal_data.ksnd_sysctl != NULL)
- unregister_sysctl_table (ksocknal_data.ksnd_sysctl);
- #endif
- kportal_nal_unregister(SOCKNAL);
- PORTAL_SYMBOL_UNREGISTER (ksocknal_ni);
- libcfs_nal_cmd_unregister(SOCKNAL);
-
- ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB;
-- /* fall through */
--
- case SOCKNAL_INIT_PTL:
- case SOCKNAL_INIT_LIB:
-- /* No more calls to ksocknal_cmd() to create new
-- * autoroutes/connections since we're being unloaded. */
- PtlNIFini(ksocknal_ni);
--
- /* Delete all autoroute entries */
- ksocknal_del_route(PTL_NID_ANY, 0, 0, 0);
- /* Delete all peers */
- ksocknal_del_peer(PTL_NID_ANY, 0, 0);
--
- /* Delete all connections */
- ksocknal_close_matching_conns (PTL_NID_ANY, 0);
-
-- /* Wait for all peer state to clean up */
-- i = 2;
-- while (atomic_read (&ksocknal_data.ksnd_npeers) != 0) {
-- i++;
-- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-- "waiting for %d peers to disconnect\n",
-- atomic_read (&ksocknal_data.ksnd_npeers));
-- set_current_state (TASK_UNINTERRUPTIBLE);
-- schedule_timeout (HZ);
-- }
--
-- /* Tell lib we've stopped calling into her. */
-- lib_fini(&ksocknal_lib);
-
- ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
-- /* fall through */
--
-- case SOCKNAL_INIT_DATA:
- /* Module refcount only gets to zero when all peers
- * have been closed so all lists must be empty */
-- LASSERT (atomic_read (&ksocknal_data.ksnd_npeers) == 0);
-- LASSERT (ksocknal_data.ksnd_peers != NULL);
-- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-- LASSERT (list_empty (&ksocknal_data.ksnd_peers[i]));
-- }
-- LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns));
-- LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns));
-- LASSERT (list_empty (&ksocknal_data.ksnd_autoconnectd_routes));
-- LASSERT (list_empty (&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns));
-- LASSERT (list_empty (&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns));
--
-- if (ksocknal_data.ksnd_schedulers != NULL)
- for (i = 0; i < SOCKNAL_N_SCHED; i++) {
- for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-- ksock_sched_t *kss =
-- &ksocknal_data.ksnd_schedulers[i];
--
-- LASSERT (list_empty (&kss->kss_tx_conns));
-- LASSERT (list_empty (&kss->kss_rx_conns));
-- LASSERT (kss->kss_nconns == 0);
-- }
--
-- /* stop router calling me */
-- kpr_shutdown (&ksocknal_data.ksnd_router);
--
-- /* flag threads to terminate; wake and wait for them to die */
-- ksocknal_data.ksnd_shuttingdown = 1;
-- wake_up_all (&ksocknal_data.ksnd_autoconnectd_waitq);
-- wake_up_all (&ksocknal_data.ksnd_reaper_waitq);
--
- for (i = 0; i < SOCKNAL_N_SCHED; i++)
- wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq);
- for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
- sched = &ksocknal_data.ksnd_schedulers[i];
- wake_up_all(&sched->kss_waitq);
- }
--
- while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) {
- CDEBUG (D_NET, "waitinf for %d threads to terminate\n",
- atomic_read (&ksocknal_data.ksnd_nthreads));
- i = 4;
- read_lock(&ksocknal_data.ksnd_global_lock);
- while (ksocknal_data.ksnd_nthreads != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d threads to terminate\n",
- ksocknal_data.ksnd_nthreads);
- read_unlock(&ksocknal_data.ksnd_global_lock);
-- set_current_state (TASK_UNINTERRUPTIBLE);
-- schedule_timeout (HZ);
- read_lock(&ksocknal_data.ksnd_global_lock);
-- }
- read_unlock(&ksocknal_data.ksnd_global_lock);
--
-- kpr_deregister (&ksocknal_data.ksnd_router);
--
-- ksocknal_free_buffers();
-
- ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
-- /* fall through */
--
-- case SOCKNAL_INIT_NOTHING:
-- break;
-- }
--
-- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-- atomic_read (&portal_kmemory));
--
-- printk(KERN_INFO "Lustre: Routing socket NAL unloaded (final mem %d)\n",
-- atomic_read(&portal_kmemory));
--}
--
--
- void __init
-void
--ksocknal_init_incarnation (void)
--{
-- struct timeval tv;
--
-- /* The incarnation number is the time this module loaded and it
-- * identifies this particular instance of the socknal. Hopefully
-- * we won't be able to reboot more frequently than 1MHz for the
-- * forseeable future :) */
--
-- do_gettimeofday(&tv);
--
-- ksocknal_data.ksnd_incarnation =
-- (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
--}
--
- int __init
- ksocknal_module_init (void)
-int
-ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
- ptl_ni_limits_t *requested_limits,
- ptl_ni_limits_t *actual_limits)
--{
- int pkmem = atomic_read(&portal_kmemory);
- int rc;
- int i;
- int j;
- ptl_process_id_t process_id;
- int pkmem = atomic_read(&portal_kmemory);
- int rc;
- int i;
- int j;
--
- /* packet descriptor must fit in a router descriptor's scratchpad */
- LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
- /* the following must be sizeof(int) for proc_dointvec() */
- LASSERT(sizeof (ksocknal_data.ksnd_io_timeout) == sizeof (int));
- LASSERT(sizeof (ksocknal_data.ksnd_eager_ack) == sizeof (int));
- /* check ksnr_connected/connecting field large enough */
- LASSERT(SOCKNAL_CONN_NTYPES <= 4);
-
- LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
- LASSERT (nal == &ksocknal_api);
--
- ksocknal_api.forward = ksocknal_api_forward;
- ksocknal_api.shutdown = ksocknal_api_shutdown;
- ksocknal_api.yield = ksocknal_api_yield;
- ksocknal_api.validate = NULL; /* our api validate is a NOOP */
- ksocknal_api.lock = ksocknal_api_lock;
- ksocknal_api.unlock = ksocknal_api_unlock;
- ksocknal_api.nal_data = &ksocknal_data;
- if (nal->nal_refct != 0) {
- if (actual_limits != NULL)
- *actual_limits = ksocknal_lib.libnal_ni.ni_actual_limits;
- /* This module got the first ref */
- PORTAL_MODULE_USE;
- return (PTL_OK);
- }
--
- ksocknal_lib.nal_data = &ksocknal_data;
- LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
--
-- memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
--
- ksocknal_data.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT;
- ksocknal_data.ksnd_eager_ack = SOCKNAL_EAGER_ACK;
- ksocknal_data.ksnd_typed_conns = SOCKNAL_TYPED_CONNS;
- ksocknal_data.ksnd_min_bulk = SOCKNAL_MIN_BULK;
- #if SOCKNAL_ZC
- ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG;
- #endif
-- ksocknal_init_incarnation();
--
-- ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
-- PORTAL_ALLOC (ksocknal_data.ksnd_peers,
-- sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
-- if (ksocknal_data.ksnd_peers == NULL)
-- return (-ENOMEM);
--
-- for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
-- INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
--
-- rwlock_init(&ksocknal_data.ksnd_global_lock);
-
- ksocknal_data.ksnd_nal_cb = &ksocknal_lib;
- spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock);
--
-- spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
-- INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
-- INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
-- ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES;
--
-- spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock);
-- INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
-- INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
-- ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES;
--
-- spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
-- INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
-- INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
-- INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
-- init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
--
-- spin_lock_init (&ksocknal_data.ksnd_autoconnectd_lock);
-- INIT_LIST_HEAD (&ksocknal_data.ksnd_autoconnectd_routes);
-- init_waitqueue_head(&ksocknal_data.ksnd_autoconnectd_waitq);
--
-- /* NB memset above zeros whole of ksocknal_data, including
-- * ksocknal_data.ksnd_irqinfo[all].ksni_valid */
--
-- /* flag lists/ptrs/locks initialised */
-- ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
--
- ksocknal_data.ksnd_nschedulers = ksocknal_nsched();
-- PORTAL_ALLOC(ksocknal_data.ksnd_schedulers,
- sizeof(ksock_sched_t) * SOCKNAL_N_SCHED);
- sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
-- if (ksocknal_data.ksnd_schedulers == NULL) {
- ksocknal_module_fini ();
- ksocknal_api_shutdown (nal);
-- return (-ENOMEM);
-- }
--
- for (i = 0; i < SOCKNAL_N_SCHED; i++) {
- for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-- ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
--
-- spin_lock_init (&kss->kss_lock);
-- INIT_LIST_HEAD (&kss->kss_rx_conns);
-- INIT_LIST_HEAD (&kss->kss_tx_conns);
--#if SOCKNAL_ZC
-- INIT_LIST_HEAD (&kss->kss_zctxdone_list);
--#endif
-- init_waitqueue_head (&kss->kss_waitq);
-- }
--
- rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni);
- if (rc != 0) {
- CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
- ksocknal_module_fini ();
- /* NB we have to wait to be told our true NID... */
- process_id.pid = requested_pid;
- process_id.nid = 0;
-
- rc = lib_init(&ksocknal_lib, nal, process_id,
- requested_limits, actual_limits);
- if (rc != PTL_OK) {
- CERROR("lib_init failed: error %d\n", rc);
- ksocknal_api_shutdown (nal);
-- return (rc);
-- }
- PtlNIDebug(ksocknal_ni, ~0);
--
- ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called
- ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; // flag lib_init() called
--
- for (i = 0; i < SOCKNAL_N_SCHED; i++) {
- for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-- rc = ksocknal_thread_start (ksocknal_scheduler,
-- &ksocknal_data.ksnd_schedulers[i]);
-- if (rc != 0) {
-- CERROR("Can't spawn socknal scheduler[%d]: %d\n",
-- i, rc);
- ksocknal_module_fini ();
- ksocknal_api_shutdown (nal);
-- return (rc);
-- }
-- }
--
-- for (i = 0; i < SOCKNAL_N_AUTOCONNECTD; i++) {
-- rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i));
-- if (rc != 0) {
-- CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
- ksocknal_module_fini ();
- ksocknal_api_shutdown (nal);
-- return (rc);
-- }
-- }
--
-- rc = ksocknal_thread_start (ksocknal_reaper, NULL);
-- if (rc != 0) {
-- CERROR ("Can't spawn socknal reaper: %d\n", rc);
- ksocknal_module_fini ();
- ksocknal_api_shutdown (nal);
-- return (rc);
-- }
--
-- rc = kpr_register(&ksocknal_data.ksnd_router,
-- &ksocknal_router_interface);
-- if (rc != 0) {
-- CDEBUG(D_NET, "Can't initialise routing interface "
-- "(rc = %d): not routing\n", rc);
-- } else {
- /* Only allocate forwarding buffers if I'm on a gateway */
- /* Only allocate forwarding buffers if there's a router */
--
-- for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
-- SOCKNAL_LARGE_FWD_NMSGS); i++) {
-- ksock_fmb_t *fmb;
-- ksock_fmb_pool_t *pool;
--
--
-- if (i < SOCKNAL_SMALL_FWD_NMSGS)
-- pool = &ksocknal_data.ksnd_small_fmp;
-- else
-- pool = &ksocknal_data.ksnd_large_fmp;
--
-- PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t,
-- fmb_kiov[pool->fmp_buff_pages]));
-- if (fmb == NULL) {
- ksocknal_module_fini();
- ksocknal_api_shutdown(nal);
-- return (-ENOMEM);
-- }
--
-- fmb->fmb_pool = pool;
--
-- for (j = 0; j < pool->fmp_buff_pages; j++) {
-- fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
--
-- if (fmb->fmb_kiov[j].kiov_page == NULL) {
- ksocknal_module_fini ();
- ksocknal_api_shutdown (nal);
-- return (-ENOMEM);
-- }
--
-- LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL);
-- }
--
-- list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs);
-- }
-- }
--
- rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL);
- rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL);
-- if (rc != 0) {
-- CERROR ("Can't initialise command interface (rc = %d)\n", rc);
- ksocknal_module_fini ();
- ksocknal_api_shutdown (nal);
-- return (rc);
-- }
-
- PORTAL_SYMBOL_REGISTER(ksocknal_ni);
--
- #ifdef CONFIG_SYSCTL
- /* Press on regardless even if registering sysctl doesn't work */
- ksocknal_data.ksnd_sysctl = register_sysctl_table (ksocknal_top_ctl_table, 0);
- #endif
-- /* flag everything initialised */
-- ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
--
-- printk(KERN_INFO "Lustre: Routing socket NAL loaded "
- "(Routing %s, initial mem %d)\n",
- "(Routing %s, initial mem %d, incarnation "LPX64")\n",
-- kpr_routing (&ksocknal_data.ksnd_router) ?
- "enabled" : "disabled", pkmem);
- "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation);
-
- return (0);
-}
-
-void __exit
-ksocknal_module_fini (void)
-{
-#ifdef CONFIG_SYSCTL
- if (ksocknal_tunables.ksnd_sysctl != NULL)
- unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl);
-#endif
- PtlNIFini(ksocknal_ni);
-
- ptl_unregister_nal(SOCKNAL);
-}
-
-int __init
-ksocknal_module_init (void)
-{
- int rc;
-
- /* packet descriptor must fit in a router descriptor's scratchpad */
- LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
- /* the following must be sizeof(int) for proc_dointvec() */
- LASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int));
- LASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int));
- LASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int));
- LASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int));
- LASSERT(sizeof (ksocknal_tunables.ksnd_buffer_size) == sizeof (int));
- LASSERT(sizeof (ksocknal_tunables.ksnd_nagle) == sizeof (int));
- LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_idle) == sizeof (int));
- LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_count) == sizeof (int));
- LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_intvl) == sizeof (int));
-#if CPU_AFFINITY
- LASSERT(sizeof (ksocknal_tunables.ksnd_irq_affinity) == sizeof (int));
-#endif
-#if SOCKNAL_ZC
- LASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int));
-#endif
- /* check ksnr_connected/connecting field large enough */
- LASSERT(SOCKNAL_CONN_NTYPES <= 4);
-
- ksocknal_api.nal_ni_init = ksocknal_api_startup;
- ksocknal_api.nal_ni_fini = ksocknal_api_shutdown;
-
- /* Initialise dynamic tunables to defaults once only */
- ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT;
- ksocknal_tunables.ksnd_eager_ack = SOCKNAL_EAGER_ACK;
- ksocknal_tunables.ksnd_typed_conns = SOCKNAL_TYPED_CONNS;
- ksocknal_tunables.ksnd_min_bulk = SOCKNAL_MIN_BULK;
- ksocknal_tunables.ksnd_buffer_size = SOCKNAL_BUFFER_SIZE;
- ksocknal_tunables.ksnd_nagle = SOCKNAL_NAGLE;
- ksocknal_tunables.ksnd_keepalive_idle = SOCKNAL_KEEPALIVE_IDLE;
- ksocknal_tunables.ksnd_keepalive_count = SOCKNAL_KEEPALIVE_COUNT;
- ksocknal_tunables.ksnd_keepalive_intvl = SOCKNAL_KEEPALIVE_INTVL;
-#if CPU_AFFINITY
- ksocknal_tunables.ksnd_irq_affinity = SOCKNAL_IRQ_AFFINITY;
-#endif
-#if SOCKNAL_ZC
- ksocknal_tunables.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG;
-#endif
-
- rc = ptl_register_nal(SOCKNAL, &ksocknal_api);
- if (rc != PTL_OK) {
- CERROR("Can't register SOCKNAL: %d\n", rc);
- return (-ENOMEM); /* or something... */
- }
--
- /* Pure gateways want the NAL started up at module load time... */
- rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni);
- if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
- ptl_unregister_nal(SOCKNAL);
- return (-ENODEV);
- }
-
-#ifdef CONFIG_SYSCTL
- /* Press on regardless even if registering sysctl doesn't work */
- ksocknal_tunables.ksnd_sysctl =
- register_sysctl_table (ksocknal_top_ctl_table, 0);
-#endif
-- return (0);
--}
--
--MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
--MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
--MODULE_LICENSE("GPL");
--
--module_init(ksocknal_module_init);
--module_exit(ksocknal_module_fini);
--
- EXPORT_SYMBOL (ksocknal_ni);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- * Author: Zach Brown <zab@zabbo.net>
-- * Author: Peter J. Braam <braam@clusterfs.com>
-- * Author: Phil Schwan <phil@clusterfs.com>
-- * Author: Eric Barton <eric@bartonsoftware.com>
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#define DEBUG_PORTAL_ALLOC
--#ifndef EXPORT_SYMTAB
--# define EXPORT_SYMTAB
--#endif
--
--#include <linux/config.h>
--#include <linux/module.h>
--#include <linux/kernel.h>
--#include <linux/mm.h>
--#include <linux/string.h>
--#include <linux/stat.h>
--#include <linux/errno.h>
--#include <linux/smp_lock.h>
--#include <linux/unistd.h>
--#include <net/sock.h>
--#include <net/tcp.h>
--#include <linux/uio.h>
--
--#include <asm/system.h>
--#include <asm/uaccess.h>
--#include <asm/irq.h>
--
--#include <linux/init.h>
--#include <linux/fs.h>
--#include <linux/file.h>
--#include <linux/stat.h>
--#include <linux/list.h>
--#include <linux/kmod.h>
--#include <linux/sysctl.h>
--#include <asm/uaccess.h>
--#include <asm/segment.h>
--#include <asm/div64.h>
--
--#define DEBUG_SUBSYSTEM S_SOCKNAL
--
--#include <linux/kp30.h>
--#include <linux/portals_compat25.h>
--#include <linux/kpr.h>
--#include <portals/p30.h>
--#include <portals/lib-p30.h>
--#include <portals/nal.h>
--#include <portals/socknal.h>
-#include <linux/lustre_idl.h>
--
- #define SOCKNAL_N_SCHED ksocknal_nsched() /* # socknal schedulers */
-#include <linux/lustre_idl.h>
--#define SOCKNAL_N_AUTOCONNECTD 4 /* # socknal autoconnect daemons */
--
--#define SOCKNAL_MIN_RECONNECT_INTERVAL HZ /* first failed connection retry... */
--#define SOCKNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */
--
--/* default vals for runtime tunables */
--#define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */
--#define SOCKNAL_EAGER_ACK 0 /* default eager ack (boolean) */
--#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */
--#define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */
--#define SOCKNAL_MIN_BULK (1<<10) /* smallest "large" message */
- #define SOCKNAL_USE_KEEPALIVES 0 /* use tcp/ip keepalive? */
-#define SOCKNAL_BUFFER_SIZE (8<<20) /* default socket buffer size */
-#define SOCKNAL_NAGLE 0 /* enable/disable NAGLE? */
-#define SOCKNAL_IRQ_AFFINITY 1 /* enable/disable IRQ affinity? */
-#define SOCKNAL_KEEPALIVE_IDLE 0 /* # seconds idle before 1st probe */
-#define SOCKNAL_KEEPALIVE_COUNT 10 /* # unanswered probes to determine peer death */
-#define SOCKNAL_KEEPALIVE_INTVL 1 /* seconds between probes */
--
--#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
--
--#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
--#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */
--
--#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */
--
--#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN(PTL_MTU) >> PAGE_SHIFT)
-- /* # pages in a large message fwd buffer */
--
--#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
--#define SOCKNAL_ENOMEM_RETRY 1 /* jiffies between retries */
-
-#define SOCKNAL_MAX_INTERFACES 16 /* Largest number of interfaces we bind */
-
-#define SOCKNAL_ROUND_ROBIN 0 /* round robin / load balance */
--
--#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sk_sndbuf*8)/10)
--
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72))
-# define sk_allocation allocation
--# define sk_data_ready data_ready
--# define sk_write_space write_space
--# define sk_user_data user_data
--# define sk_prot prot
--# define sk_sndbuf sndbuf
--# define sk_socket socket
--#endif
--
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
--# define sk_wmem_queued wmem_queued
-# define sk_err err
--#endif
--
--typedef struct /* pool of forwarding buffers */
--{
-- spinlock_t fmp_lock; /* serialise */
-- struct list_head fmp_idle_fmbs; /* free buffers */
-- struct list_head fmp_blocked_conns; /* connections waiting for a buffer */
-- int fmp_nactive_fmbs; /* # buffers in use */
-- int fmp_buff_pages; /* # pages per buffer */
--} ksock_fmb_pool_t;
--
--
--typedef struct /* per scheduler state */
--{
-- spinlock_t kss_lock; /* serialise */
-- struct list_head kss_rx_conns; /* conn waiting to be read */
-- struct list_head kss_tx_conns; /* conn waiting to be written */
--#if SOCKNAL_ZC
-- struct list_head kss_zctxdone_list; /* completed ZC transmits */
--#endif
-- wait_queue_head_t kss_waitq; /* where scheduler sleeps */
-- int kss_nconns; /* # connections assigned to this scheduler */
--} ksock_sched_t;
--
- typedef struct {
-typedef struct
-{
-- int ksni_valid:1; /* been set yet? */
-- int ksni_bound:1; /* bound to a cpu yet? */
-- int ksni_sched:6; /* which scheduler (assumes < 64) */
--} ksock_irqinfo_t;
--
- typedef struct {
- int ksnd_init; /* initialisation state */
-typedef struct
-{
- __u32 ksni_ipaddr; /* interface's IP address */
- __u32 ksni_netmask; /* interface's network mask */
- int ksni_nroutes; /* # routes using (active) */
- int ksni_npeers; /* # peers using (passive) */
-} ksock_interface_t;
-
-typedef struct
-{
-- int ksnd_io_timeout; /* "stuck" socket timeout (seconds) */
-- int ksnd_eager_ack; /* make TCP ack eagerly? */
-- int ksnd_typed_conns; /* drive sockets by type? */
-- int ksnd_min_bulk; /* smallest "large" message */
- int ksnd_buffer_size; /* socket buffer size */
- int ksnd_nagle; /* enable NAGLE? */
- int ksnd_irq_affinity; /* enable IRQ affinity? */
- int ksnd_keepalive_idle; /* # idle secs before 1st probe */
- int ksnd_keepalive_count; /* # probes */
- int ksnd_keepalive_intvl; /* time between probes */
--#if SOCKNAL_ZC
-- unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */
--#endif
-- struct ctl_table_header *ksnd_sysctl; /* sysctl interface */
-} ksock_tunables_t;
-
-typedef struct
-{
- int ksnd_init; /* initialisation state */
-- __u64 ksnd_incarnation; /* my epoch */
--
-- rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */
-- struct list_head *ksnd_peers; /* hash table of all my known peers */
-- int ksnd_peer_hash_size; /* size of ksnd_peers */
-
- nal_cb_t *ksnd_nal_cb;
- spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */
--
- atomic_t ksnd_nthreads; /* # live threads */
- int ksnd_nthreads; /* # live threads */
-- int ksnd_shuttingdown; /* tell threads to exit */
- ksock_sched_t *ksnd_schedulers; /* scheduler state */
- int ksnd_nschedulers; /* # schedulers */
- ksock_sched_t *ksnd_schedulers; /* their state */
--
-- atomic_t ksnd_npeers; /* total # peers extant */
-- atomic_t ksnd_nclosing_conns; /* # closed conns extant */
--
-- kpr_router_t ksnd_router; /* THE router */
--
-- ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */
-- ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */
--
-- atomic_t ksnd_nactive_ltxs; /* #active ltxs */
--
-- struct list_head ksnd_deathrow_conns; /* conns to be closed */
-- struct list_head ksnd_zombie_conns; /* conns to be freed */
-- struct list_head ksnd_enomem_conns; /* conns to be retried */
-- wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */
-- unsigned long ksnd_reaper_waketime; /* when reaper will wake */
-- spinlock_t ksnd_reaper_lock; /* serialise */
--
-- int ksnd_enomem_tx; /* test ENOMEM sender */
-- int ksnd_stall_tx; /* test sluggish sender */
-- int ksnd_stall_rx; /* test sluggish receiver */
--
-- struct list_head ksnd_autoconnectd_routes; /* routes waiting to be connected */
-- wait_queue_head_t ksnd_autoconnectd_waitq; /* autoconnectds sleep here */
-- spinlock_t ksnd_autoconnectd_lock; /* serialise */
--
-- ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */
-
- int ksnd_ninterfaces;
- ksock_interface_t ksnd_interfaces[SOCKNAL_MAX_INTERFACES]; /* published interfaces */
--} ksock_nal_data_t;
--
--#define SOCKNAL_INIT_NOTHING 0
--#define SOCKNAL_INIT_DATA 1
- #define SOCKNAL_INIT_PTL 2
-#define SOCKNAL_INIT_LIB 2
--#define SOCKNAL_INIT_ALL 3
--
--/* A packet just assembled for transmission is represented by 1 or more
-- * struct iovec fragments (the first frag contains the portals header),
-- * followed by 0 or more ptl_kiov_t fragments.
-- *
-- * On the receive side, initially 1 struct iovec fragment is posted for
-- * receive (the header). Once the header has been received, the payload is
-- * received into either struct iovec or ptl_kiov_t fragments, depending on
-- * what the header matched or whether the message needs forwarding. */
--
--struct ksock_conn; /* forward ref */
--struct ksock_peer; /* forward ref */
--struct ksock_route; /* forward ref */
--
--typedef struct /* transmit packet */
--{
-- struct list_head tx_list; /* queue on conn for transmission etc */
-- char tx_isfwd; /* forwarding / sourced here */
-- int tx_nob; /* # packet bytes */
-- int tx_resid; /* residual bytes */
-- int tx_niov; /* # packet iovec frags */
-- struct iovec *tx_iov; /* packet iovec frags */
-- int tx_nkiov; /* # packet page frags */
-- ptl_kiov_t *tx_kiov; /* packet page frags */
-- struct ksock_conn *tx_conn; /* owning conn */
-- ptl_hdr_t *tx_hdr; /* packet header (for debug only) */
--#if SOCKNAL_ZC
-- zccd_t tx_zccd; /* zero copy callback descriptor */
--#endif
--} ksock_tx_t;
--
--typedef struct /* forwarded packet */
--{
-- ksock_tx_t ftx_tx; /* send info */
-- struct iovec ftx_iov; /* hdr iovec */
--} ksock_ftx_t;
--
--#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd)
--/* network zero copy callback descriptor embedded in ksock_tx_t */
--
--typedef struct /* locally transmitted packet */
--{
-- ksock_tx_t ltx_tx; /* send info */
-- void *ltx_private; /* lib_finalize() callback arg */
-- void *ltx_cookie; /* lib_finalize() callback arg */
-- ptl_hdr_t ltx_hdr; /* buffer for packet header */
-- int ltx_desc_size; /* bytes allocated for this desc */
-- struct iovec ltx_iov[1]; /* iov for hdr + payload */
-- ptl_kiov_t ltx_kiov[0]; /* kiov for payload */
--} ksock_ltx_t;
--
--#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch)
--/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */
--
--#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx)
--/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */
--
--/* NB list_entry() is used here as convenient macro for calculating a
-- * pointer to a struct from the address of a member. */
--
--typedef struct /* Kernel portals Socket Forwarding message buffer */
--{ /* (socknal->router) */
-- struct list_head fmb_list; /* queue idle */
-- kpr_fwd_desc_t fmb_fwd; /* router's descriptor */
-- ksock_fmb_pool_t *fmb_pool; /* owning pool */
-- struct ksock_peer *fmb_peer; /* peer received from */
-- ptl_hdr_t fmb_hdr; /* message header */
-- ptl_kiov_t fmb_kiov[0]; /* payload frags */
--} ksock_fmb_t;
--
--/* space for the rx frag descriptors; we either read a single contiguous
-- * header, or up to PTL_MD_MAX_IOV frags of payload of either type. */
--typedef union {
-- struct iovec iov[PTL_MD_MAX_IOV];
-- ptl_kiov_t kiov[PTL_MD_MAX_IOV];
--} ksock_rxiovspace_t;
--
--#define SOCKNAL_RX_HEADER 1 /* reading header */
--#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */
--#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */
--#define SOCKNAL_RX_SLOP 4 /* skipping body */
--#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */
--#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */
--
--typedef struct ksock_conn
--{
-- struct ksock_peer *ksnc_peer; /* owning peer */
-- struct ksock_route *ksnc_route; /* owning route */
-- struct list_head ksnc_list; /* stash on peer's conn list */
-- struct socket *ksnc_sock; /* actual socket */
-- void *ksnc_saved_data_ready; /* socket's original data_ready() callback */
-- void *ksnc_saved_write_space; /* socket's original write_space() callback */
-- atomic_t ksnc_refcount; /* # users */
-- ksock_sched_t *ksnc_scheduler; /* who schedules this connection */
- __u32 ksnc_myipaddr; /* my IP */
-- __u32 ksnc_ipaddr; /* peer's IP */
-- int ksnc_port; /* peer's port */
-- int ksnc_closing; /* being shut down */
-- int ksnc_type; /* type of connection */
-- __u64 ksnc_incarnation; /* peer's incarnation */
--
-- /* reader */
-- struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
-- unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times out */
-- int ksnc_rx_started; /* started receiving a message */
-- int ksnc_rx_ready; /* data ready to read */
-- int ksnc_rx_scheduled; /* being progressed */
-- int ksnc_rx_state; /* what is being read */
-- int ksnc_rx_nob_left; /* # bytes to next hdr/body */
-- int ksnc_rx_nob_wanted; /* bytes actually wanted */
-- int ksnc_rx_niov; /* # iovec frags */
-- struct iovec *ksnc_rx_iov; /* the iovec frags */
-- int ksnc_rx_nkiov; /* # page frags */
-- ptl_kiov_t *ksnc_rx_kiov; /* the page frags */
-- ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */
-- void *ksnc_cookie; /* rx lib_finalize passthru arg */
-- ptl_hdr_t ksnc_hdr; /* where I read headers into */
--
-- /* WRITER */
-- struct list_head ksnc_tx_list; /* where I enq waiting for output space */
-- struct list_head ksnc_tx_queue; /* packets waiting to be sent */
-- unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out */
- int ksnc_tx_bufnob; /* send buffer marker */
-- atomic_t ksnc_tx_nob; /* # bytes queued */
-- int ksnc_tx_ready; /* write space */
-- int ksnc_tx_scheduled; /* being progressed */
--} ksock_conn_t;
--
--#define KSNR_TYPED_ROUTES ((1 << SOCKNAL_CONN_CONTROL) | \
-- (1 << SOCKNAL_CONN_BULK_IN) | \
-- (1 << SOCKNAL_CONN_BULK_OUT))
--
--typedef struct ksock_route
--{
-- struct list_head ksnr_list; /* chain on peer route list */
-- struct list_head ksnr_connect_list; /* chain on autoconnect list */
-- struct ksock_peer *ksnr_peer; /* owning peer */
-- atomic_t ksnr_refcount; /* # users */
- int ksnr_sharecount; /* lconf usage counter */
-- unsigned long ksnr_timeout; /* when (in jiffies) reconnection can happen next */
-- unsigned int ksnr_retry_interval; /* how long between retries */
- __u32 ksnr_ipaddr; /* an IP address for this peer */
- __u32 ksnr_myipaddr; /* my IP */
- __u32 ksnr_ipaddr; /* IP address to connect to */
-- int ksnr_port; /* port to connect to */
- int ksnr_buffer_size; /* size of socket buffers */
- unsigned int ksnr_irq_affinity:1; /* set affinity? */
- unsigned int ksnr_eager:1; /* connect eagery? */
-- unsigned int ksnr_connecting:4; /* autoconnects in progress by type */
-- unsigned int ksnr_connected:4; /* connections established by type */
-- unsigned int ksnr_deleted:1; /* been removed from peer? */
- unsigned int ksnr_share_count; /* created explicitly? */
-- int ksnr_conn_count; /* # conns established by this route */
--} ksock_route_t;
--
--typedef struct ksock_peer
--{
-- struct list_head ksnp_list; /* stash on global peer list */
-- ptl_nid_t ksnp_nid; /* who's on the other end(s) */
-- atomic_t ksnp_refcount; /* # users */
- int ksnp_sharecount; /* lconf usage counter */
-- int ksnp_closing; /* being closed */
-- int ksnp_error; /* errno on closing last conn */
-- struct list_head ksnp_conns; /* all active connections */
-- struct list_head ksnp_routes; /* routes */
-- struct list_head ksnp_tx_queue; /* waiting packets */
-- unsigned long ksnp_last_alive; /* when (in jiffies) I was last alive */
- int ksnp_n_passive_ips; /* # of... */
- __u32 ksnp_passive_ips[SOCKNAL_MAX_INTERFACES]; /* preferred local interfaces */
--} ksock_peer_t;
--
--
- extern nal_cb_t ksocknal_lib;
-extern lib_nal_t ksocknal_lib;
--extern ksock_nal_data_t ksocknal_data;
-extern ksock_tunables_t ksocknal_tunables;
--
--static inline struct list_head *
- ksocknal_nid2peerlist (ptl_nid_t nid)
-ksocknal_nid2peerlist (ptl_nid_t nid)
--{
-- unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
-
-
-- return (&ksocknal_data.ksnd_peers [hash]);
--}
--
--static inline int
- ksocknal_getconnsock (ksock_conn_t *conn)
-ksocknal_getconnsock (ksock_conn_t *conn)
--{
-- int rc = -ESHUTDOWN;
-
-
-- read_lock (&ksocknal_data.ksnd_global_lock);
-- if (!conn->ksnc_closing) {
-- rc = 0;
-- get_file (conn->ksnc_sock->file);
-- }
-- read_unlock (&ksocknal_data.ksnd_global_lock);
--
-- return (rc);
--}
--
--static inline void
--ksocknal_putconnsock (ksock_conn_t *conn)
--{
-- fput (conn->ksnc_sock->file);
--}
--
--#ifndef CONFIG_SMP
- static inline
-static inline
--int ksocknal_nsched(void)
--{
-- return 1;
--}
--#else
--#include <linux/lustre_version.h>
--# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || (LUSTRE_KERNEL_VERSION < 39) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT))
--static inline int
--ksocknal_nsched(void)
--{
-- return num_online_cpus();
--}
--
--static inline int
--ksocknal_sched2cpu(int i)
--{
-- return i;
--}
--
--static inline int
--ksocknal_irqsched2cpu(int i)
--{
-- return i;
--}
- # else
-# else
--static inline int
--ksocknal_nsched(void)
--{
-- if (smp_num_siblings == 1)
-- return (num_online_cpus());
--
-- /* We need to know if this assumption is crap */
-- LASSERT (smp_num_siblings == 2);
-- return (num_online_cpus()/2);
--}
--
--static inline int
--ksocknal_sched2cpu(int i)
--{
-- if (smp_num_siblings == 1)
-- return i;
-
-
-- return (i * 2);
--}
--
--static inline int
--ksocknal_irqsched2cpu(int i)
--{
- if (smp_num_siblings == 1)
- return ksocknal_sched2cpu(i);
-
-- return (ksocknal_sched2cpu(i) + 1);
--}
--# endif
--#endif
--
--extern void ksocknal_put_route (ksock_route_t *route);
--extern void ksocknal_put_peer (ksock_peer_t *peer);
--extern ksock_peer_t *ksocknal_find_peer_locked (ptl_nid_t nid);
--extern ksock_peer_t *ksocknal_get_peer (ptl_nid_t nid);
--extern int ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr,
-- int single, int keep_conn);
--extern int ksocknal_create_conn (ksock_route_t *route,
- struct socket *sock, int bind_irq, int type);
- struct socket *sock, int type);
--extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
--extern void ksocknal_terminate_conn (ksock_conn_t *conn);
--extern void ksocknal_destroy_conn (ksock_conn_t *conn);
--extern void ksocknal_put_conn (ksock_conn_t *conn);
--extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation);
--extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
--extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr);
--
--extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
--extern void ksocknal_tx_done (ksock_tx_t *tx, int asynch);
--extern void ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
--extern void ksocknal_fmb_callback (void *arg, int error);
--extern void ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive);
--extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg);
--extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
--extern int ksocknal_scheduler (void *arg);
--extern void ksocknal_data_ready(struct sock *sk, int n);
--extern void ksocknal_write_space(struct sock *sk);
--extern int ksocknal_autoconnectd (void *arg);
--extern int ksocknal_reaper (void *arg);
-extern int ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem,
- int *rxmem, int *nagle);
--extern int ksocknal_setup_sock (struct socket *sock);
- extern int ksocknal_hello (struct socket *sock,
- ptl_nid_t *nid, int *type, __u64 *incarnation);
-extern int ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs);
-extern int ksocknal_recv_hello (ksock_conn_t *conn,
- ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- * Author: Zach Brown <zab@zabbo.net>
-- * Author: Peter J. Braam <braam@clusterfs.com>
-- * Author: Phil Schwan <phil@clusterfs.com>
-- * Author: Eric Barton <eric@bartonsoftware.com>
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#include "socknal.h"
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
--# include <linux/syscalls.h>
--#endif
--
--/*
-- * LIB functions follow
-- *
-- */
- ptl_err_t
- ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr,
- user_ptr src_addr, size_t len)
- {
- CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
- }
-
- ptl_err_t
- ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr,
- void *src_addr, size_t len)
- {
- CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n",
- nal->ni.nid, (long)len, src_addr, dst_addr);
-
- memcpy( dst_addr, src_addr, len );
- return PTL_OK;
- }
-
- void *
- ksocknal_malloc(nal_cb_t *nal, size_t len)
- {
- void *buf;
-
- PORTAL_ALLOC(buf, len);
-
- if (buf != NULL)
- memset(buf, 0, len);
-
- return (buf);
- }
-
- void
- ksocknal_free(nal_cb_t *nal, void *buf, size_t len)
- {
- PORTAL_FREE(buf, len);
- }
-
- void
- ksocknal_printf(nal_cb_t *nal, const char *fmt, ...)
- {
- va_list ap;
- char msg[256];
-
- va_start (ap, fmt);
- vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */
- va_end (ap);
-
- msg[sizeof (msg) - 1] = 0; /* ensure terminated */
-
- CDEBUG (D_NET, "%s", msg);
- }
-
- void
- ksocknal_cli(nal_cb_t *nal, unsigned long *flags)
- {
- ksock_nal_data_t *data = nal->nal_data;
-
- /* OK to ignore 'flags'; we're only ever serialise threads and
- * never need to lock out interrupts */
- spin_lock(&data->ksnd_nal_cb_lock);
- }
-
- void
- ksocknal_sti(nal_cb_t *nal, unsigned long *flags)
- {
- ksock_nal_data_t *data;
- data = nal->nal_data;
-
- spin_unlock(&data->ksnd_nal_cb_lock);
- }
-
--int
- ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
-ksocknal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
--{
-- /* I would guess that if ksocknal_get_peer (nid) == NULL,
-- and we're not routing, then 'nid' is very distant :) */
- if ( nal->ni.nid == nid ) {
- if (nal->libnal_ni.ni_pid.nid == nid) {
-- *dist = 0;
-- } else {
-- *dist = 1;
-- }
--
-- return 0;
--}
--
--void
--ksocknal_free_ltx (ksock_ltx_t *ltx)
--{
-- atomic_dec(&ksocknal_data.ksnd_nactive_ltxs);
-- PORTAL_FREE(ltx, ltx->ltx_desc_size);
--}
--
--#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
--struct page *
--ksocknal_kvaddr_to_page (unsigned long vaddr)
--{
-- struct page *page;
--
-- if (vaddr >= VMALLOC_START &&
-- vaddr < VMALLOC_END)
-- page = vmalloc_to_page ((void *)vaddr);
--#if CONFIG_HIGHMEM
-- else if (vaddr >= PKMAP_BASE &&
-- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
-- page = vmalloc_to_page ((void *)vaddr);
-- /* in 2.4 ^ just walks the page tables */
--#endif
-- else
-- page = virt_to_page (vaddr);
--
-- if (page == NULL ||
-- !VALID_PAGE (page))
-- return (NULL);
--
-- return (page);
--}
--#endif
--
--int
--ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
--{
-- struct socket *sock = conn->ksnc_sock;
-- struct iovec *iov = tx->tx_iov;
-- int fragsize = iov->iov_len;
-- unsigned long vaddr = (unsigned long)iov->iov_base;
-- int more = (tx->tx_niov > 1) ||
-- (tx->tx_nkiov > 0) ||
-- (!list_empty (&conn->ksnc_tx_queue));
--#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
-- int offset = vaddr & (PAGE_SIZE - 1);
-- int zcsize = MIN (fragsize, PAGE_SIZE - offset);
-- struct page *page;
--#endif
-- int rc;
--
-- /* NB we can't trust socket ops to either consume our iovs
-- * or leave them alone, so we only send 1 frag at a time. */
-- LASSERT (fragsize <= tx->tx_resid);
-- LASSERT (tx->tx_niov > 0);
--
--#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
-- if (zcsize >= ksocknal_data.ksnd_zc_min_frag &&
-- (sock->sk->route_caps & NETIF_F_SG) &&
-- (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
-- (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) {
--
-- CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n",
-- (void *)vaddr, page, page_address(page), offset, zcsize);
--
-- if (fragsize > zcsize) {
-- more = 1;
-- fragsize = zcsize;
-- }
--
-- rc = tcp_sendpage_zccd(sock, page, offset, zcsize,
-- more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT,
-- &tx->tx_zccd);
-- } else
--#endif
-- {
-- /* NB don't pass tx's iov; sendmsg may or may not update it */
-- struct iovec fragiov = { .iov_base = (void *)vaddr,
-- .iov_len = fragsize};
-- struct msghdr msg = {
-- .msg_name = NULL,
-- .msg_namelen = 0,
-- .msg_iov = &fragiov,
-- .msg_iovlen = 1,
-- .msg_control = NULL,
-- .msg_controllen = 0,
-- .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT
-- };
-- mm_segment_t oldmm = get_fs();
--
-- set_fs (KERNEL_DS);
-- rc = sock_sendmsg(sock, &msg, fragsize);
-- set_fs (oldmm);
-- }
--
-- if (rc > 0) {
-- tx->tx_resid -= rc;
--
-- if (rc < iov->iov_len) {
-- /* didn't send whole iov entry... */
-- iov->iov_base = (void *)(vaddr + rc);
-- iov->iov_len -= rc;
-- } else {
-- tx->tx_iov++;
-- tx->tx_niov--;
-- }
-- }
--
-- return (rc);
--}
--
--int
--ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
--{
-- struct socket *sock = conn->ksnc_sock;
-- ptl_kiov_t *kiov = tx->tx_kiov;
-- int fragsize = kiov->kiov_len;
-- struct page *page = kiov->kiov_page;
-- int offset = kiov->kiov_offset;
-- int more = (tx->tx_nkiov > 1) ||
-- (!list_empty (&conn->ksnc_tx_queue));
-- int rc;
--
-- /* NB we can't trust socket ops to either consume our iovs
-- * or leave them alone, so we only send 1 frag at a time. */
-- LASSERT (fragsize <= tx->tx_resid);
-- LASSERT (offset + fragsize <= PAGE_SIZE);
-- LASSERT (tx->tx_niov == 0);
-- LASSERT (tx->tx_nkiov > 0);
--
--#if SOCKNAL_ZC
- if (fragsize >= ksocknal_data.ksnd_zc_min_frag &&
- if (fragsize >= ksocknal_tunables.ksnd_zc_min_frag &&
-- (sock->sk->route_caps & NETIF_F_SG) &&
-- (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) {
--
-- CDEBUG(D_NET, "page %p + offset %x for %d\n",
-- page, offset, fragsize);
--
-- rc = tcp_sendpage_zccd(sock, page, offset, fragsize,
-- more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT,
-- &tx->tx_zccd);
-- } else
--#endif
-- {
-- char *addr = ((char *)kmap (page)) + offset;
-- struct iovec fragiov = {.iov_base = addr,
-- .iov_len = fragsize};
-- struct msghdr msg = {
-- .msg_name = NULL,
-- .msg_namelen = 0,
-- .msg_iov = &fragiov,
-- .msg_iovlen = 1,
-- .msg_control = NULL,
-- .msg_controllen = 0,
-- .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT
-- };
-- mm_segment_t oldmm = get_fs();
--
-- set_fs (KERNEL_DS);
-- rc = sock_sendmsg(sock, &msg, fragsize);
-- set_fs (oldmm);
--
-- kunmap (page);
-- }
--
-- if (rc > 0) {
-- tx->tx_resid -= rc;
--
-- if (rc < fragsize) {
-- kiov->kiov_offset = offset + rc;
-- kiov->kiov_len = fragsize - rc;
-- } else {
-- tx->tx_kiov++;
-- tx->tx_nkiov--;
-- }
-- }
--
-- return (rc);
--}
--
--int
--ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
--{
-- int rc;
- int bufnob;
--
-- if (ksocknal_data.ksnd_stall_tx != 0) {
-- set_current_state (TASK_UNINTERRUPTIBLE);
-- schedule_timeout (ksocknal_data.ksnd_stall_tx * HZ);
-- }
--
-- LASSERT (tx->tx_resid != 0);
--
-- rc = ksocknal_getconnsock (conn);
-- if (rc != 0) {
-- LASSERT (conn->ksnc_closing);
-- return (-ESHUTDOWN);
-- }
--
-- do {
-- if (ksocknal_data.ksnd_enomem_tx > 0) {
-- /* testing... */
-- ksocknal_data.ksnd_enomem_tx--;
-- rc = -EAGAIN;
-- } else if (tx->tx_niov != 0) {
-- rc = ksocknal_send_iov (conn, tx);
-- } else {
-- rc = ksocknal_send_kiov (conn, tx);
- }
-
- bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
- if (rc > 0) /* sent something? */
- conn->ksnc_tx_bufnob += rc; /* account it */
-
- if (bufnob < conn->ksnc_tx_bufnob) {
- /* allocated send buffer bytes < computed; infer
- * something got ACKed */
- conn->ksnc_tx_deadline = jiffies +
- ksocknal_tunables.ksnd_io_timeout * HZ;
- conn->ksnc_peer->ksnp_last_alive = jiffies;
- conn->ksnc_tx_bufnob = bufnob;
- mb();
-- }
--
-- if (rc <= 0) {
-- /* Didn't write anything.
-- *
-- * NB: rc == 0 and rc == -EAGAIN both mean try
-- * again later (linux stack returns -EAGAIN for
-- * this, but Adaptech TOE returns 0).
-- *
-- * Also, sends never fail with -ENOMEM, just
-- * -EAGAIN, but with the added bonus that we can't
-- * expect write_space() to call us back to tell us
-- * when to try sending again. We use the
-- * SOCK_NOSPACE flag to diagnose... */
--
-- LASSERT(rc != -ENOMEM);
--
-- if (rc == 0 || rc == -EAGAIN) {
-- if (test_bit(SOCK_NOSPACE,
-- &conn->ksnc_sock->flags)) {
-- rc = -EAGAIN;
-- } else {
-- static int counter;
--
-- counter++;
-- if ((counter & (-counter)) == counter)
-- CWARN("%d ENOMEM tx %p\n",
-- counter, conn);
-- rc = -ENOMEM;
-- }
-- }
-- break;
-- }
--
- /* socket's wmem_queued now includes 'rc' bytes */
- atomic_sub (rc, &conn->ksnc_tx_nob);
-- rc = 0;
-
- /* Consider the connection alive since we managed to chuck
- * more data into it. Really, we'd like to consider it
- * alive only when the peer ACKs something, but
- * write_space() only gets called back while SOCK_NOSPACE
- * is set. Instead, we presume peer death has occurred if
- * the socket doesn't drain within a timout */
- conn->ksnc_tx_deadline = jiffies +
- ksocknal_data.ksnd_io_timeout * HZ;
- conn->ksnc_peer->ksnp_last_alive = jiffies;
--
-- } while (tx->tx_resid != 0);
--
-- ksocknal_putconnsock (conn);
-- return (rc);
--}
--
--void
--ksocknal_eager_ack (ksock_conn_t *conn)
--{
-- int opt = 1;
-- mm_segment_t oldmm = get_fs();
-- struct socket *sock = conn->ksnc_sock;
--
-- /* Remind the socket to ACK eagerly. If I don't, the socket might
-- * think I'm about to send something it could piggy-back the ACK
-- * on, introducing delay in completing zero-copy sends in my
-- * peer. */
--
-- set_fs(KERNEL_DS);
-- sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK,
-- (char *)&opt, sizeof (opt));
-- set_fs(oldmm);
--}
--
--int
--ksocknal_recv_iov (ksock_conn_t *conn)
--{
-- struct iovec *iov = conn->ksnc_rx_iov;
-- int fragsize = iov->iov_len;
-- unsigned long vaddr = (unsigned long)iov->iov_base;
-- struct iovec fragiov = { .iov_base = (void *)vaddr,
-- .iov_len = fragsize};
-- struct msghdr msg = {
-- .msg_name = NULL,
-- .msg_namelen = 0,
-- .msg_iov = &fragiov,
-- .msg_iovlen = 1,
-- .msg_control = NULL,
-- .msg_controllen = 0,
-- .msg_flags = 0
-- };
-- mm_segment_t oldmm = get_fs();
-- int rc;
--
-- /* NB we can't trust socket ops to either consume our iovs
-- * or leave them alone, so we only receive 1 frag at a time. */
-- LASSERT (conn->ksnc_rx_niov > 0);
-- LASSERT (fragsize <= conn->ksnc_rx_nob_wanted);
--
-- set_fs (KERNEL_DS);
-- rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT);
-- /* NB this is just a boolean............................^ */
-- set_fs (oldmm);
--
-- if (rc <= 0)
-- return (rc);
--
-- /* received something... */
-- conn->ksnc_peer->ksnp_last_alive = jiffies;
-- conn->ksnc_rx_deadline = jiffies +
- ksocknal_data.ksnd_io_timeout * HZ;
- ksocknal_tunables.ksnd_io_timeout * HZ;
-- mb(); /* order with setting rx_started */
-- conn->ksnc_rx_started = 1;
--
-- conn->ksnc_rx_nob_wanted -= rc;
-- conn->ksnc_rx_nob_left -= rc;
--
-- if (rc < fragsize) {
-- iov->iov_base = (void *)(vaddr + rc);
-- iov->iov_len = fragsize - rc;
-- return (-EAGAIN);
-- }
--
-- conn->ksnc_rx_iov++;
-- conn->ksnc_rx_niov--;
-- return (1);
--}
--
--int
--ksocknal_recv_kiov (ksock_conn_t *conn)
--{
-- ptl_kiov_t *kiov = conn->ksnc_rx_kiov;
-- struct page *page = kiov->kiov_page;
-- int offset = kiov->kiov_offset;
-- int fragsize = kiov->kiov_len;
-- unsigned long vaddr = ((unsigned long)kmap (page)) + offset;
-- struct iovec fragiov = { .iov_base = (void *)vaddr,
-- .iov_len = fragsize};
-- struct msghdr msg = {
-- .msg_name = NULL,
-- .msg_namelen = 0,
-- .msg_iov = &fragiov,
-- .msg_iovlen = 1,
-- .msg_control = NULL,
-- .msg_controllen = 0,
-- .msg_flags = 0
-- };
-- mm_segment_t oldmm = get_fs();
-- int rc;
--
-- /* NB we can't trust socket ops to either consume our iovs
-- * or leave them alone, so we only receive 1 frag at a time. */
-- LASSERT (fragsize <= conn->ksnc_rx_nob_wanted);
-- LASSERT (conn->ksnc_rx_nkiov > 0);
-- LASSERT (offset + fragsize <= PAGE_SIZE);
--
-- set_fs (KERNEL_DS);
-- rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT);
-- /* NB this is just a boolean............................^ */
-- set_fs (oldmm);
--
-- kunmap (page);
--
-- if (rc <= 0)
-- return (rc);
--
-- /* received something... */
-- conn->ksnc_peer->ksnp_last_alive = jiffies;
-- conn->ksnc_rx_deadline = jiffies +
- ksocknal_data.ksnd_io_timeout * HZ;
- ksocknal_tunables.ksnd_io_timeout * HZ;
-- mb(); /* order with setting rx_started */
-- conn->ksnc_rx_started = 1;
--
-- conn->ksnc_rx_nob_wanted -= rc;
-- conn->ksnc_rx_nob_left -= rc;
--
-- if (rc < fragsize) {
-- kiov->kiov_offset = offset + rc;
-- kiov->kiov_len = fragsize - rc;
-- return (-EAGAIN);
-- }
--
-- conn->ksnc_rx_kiov++;
-- conn->ksnc_rx_nkiov--;
-- return (1);
--}
--
--int
--ksocknal_receive (ksock_conn_t *conn)
--{
-- /* Return 1 on success, 0 on EOF, < 0 on error.
-- * Caller checks ksnc_rx_nob_wanted to determine
-- * progress/completion. */
-- int rc;
-- ENTRY;
--
-- if (ksocknal_data.ksnd_stall_rx != 0) {
-- set_current_state (TASK_UNINTERRUPTIBLE);
-- schedule_timeout (ksocknal_data.ksnd_stall_rx * HZ);
-- }
--
-- rc = ksocknal_getconnsock (conn);
-- if (rc != 0) {
-- LASSERT (conn->ksnc_closing);
-- return (-ESHUTDOWN);
-- }
--
-- for (;;) {
-- if (conn->ksnc_rx_niov != 0)
-- rc = ksocknal_recv_iov (conn);
-- else
-- rc = ksocknal_recv_kiov (conn);
--
-- if (rc <= 0) {
-- /* error/EOF or partial receive */
-- if (rc == -EAGAIN) {
-- rc = 1;
-- } else if (rc == 0 && conn->ksnc_rx_started) {
-- /* EOF in the middle of a message */
-- rc = -EPROTO;
-- }
-- break;
-- }
--
-- /* Completed a fragment */
--
-- if (conn->ksnc_rx_nob_wanted == 0) {
-- /* Completed a message segment (header or payload) */
- if ((ksocknal_data.ksnd_eager_ack & conn->ksnc_type) != 0 &&
- if ((ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0 &&
-- (conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
-- conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) {
-- /* Remind the socket to ack eagerly... */
-- ksocknal_eager_ack(conn);
-- }
-- rc = 1;
-- break;
-- }
-- }
--
-- ksocknal_putconnsock (conn);
-- RETURN (rc);
--}
--
--#if SOCKNAL_ZC
--void
--ksocknal_zc_callback (zccd_t *zcd)
--{
-- ksock_tx_t *tx = KSOCK_ZCCD_2_TX(zcd);
-- ksock_sched_t *sched = tx->tx_conn->ksnc_scheduler;
-- unsigned long flags;
-- ENTRY;
--
-- /* Schedule tx for cleanup (can't do it now due to lock conflicts) */
--
-- spin_lock_irqsave (&sched->kss_lock, flags);
--
-- list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list);
-- wake_up (&sched->kss_waitq);
--
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
-- EXIT;
--}
--#endif
--
--void
--ksocknal_tx_done (ksock_tx_t *tx, int asynch)
--{
-- ksock_ltx_t *ltx;
-- ENTRY;
--
-- if (tx->tx_conn != NULL) {
- /* This tx got queued on a conn; do the accounting... */
- atomic_sub (tx->tx_nob, &tx->tx_conn->ksnc_tx_nob);
--#if SOCKNAL_ZC
-- /* zero copy completion isn't always from
-- * process_transmit() so it needs to keep a ref on
-- * tx_conn... */
-- if (asynch)
-- ksocknal_put_conn (tx->tx_conn);
--#else
-- LASSERT (!asynch);
--#endif
-- }
--
-- if (tx->tx_isfwd) { /* was a forwarded packet? */
-- kpr_fwd_done (&ksocknal_data.ksnd_router,
-- KSOCK_TX_2_KPR_FWD_DESC (tx),
-- (tx->tx_resid == 0) ? 0 : -ECONNABORTED);
-- EXIT;
-- return;
-- }
--
-- /* local send */
-- ltx = KSOCK_TX_2_KSOCK_LTX (tx);
--
-- lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie,
-- (tx->tx_resid == 0) ? PTL_OK : PTL_FAIL);
--
-- ksocknal_free_ltx (ltx);
-- EXIT;
--}
--
--void
--ksocknal_tx_launched (ksock_tx_t *tx)
--{
--#if SOCKNAL_ZC
-- if (atomic_read (&tx->tx_zccd.zccd_count) != 1) {
-- ksock_conn_t *conn = tx->tx_conn;
--
-- /* zccd skbufs are still in-flight. First take a ref on
-- * conn, so it hangs about for ksocknal_tx_done... */
-- atomic_inc (&conn->ksnc_refcount);
--
-- /* ...then drop the initial ref on zccd, so the zero copy
-- * callback can occur */
-- zccd_put (&tx->tx_zccd);
-- return;
-- }
--#endif
-- /* Any zero-copy-ness (if any) has completed; I can complete the
-- * transmit now, avoiding an extra schedule */
-- ksocknal_tx_done (tx, 0);
--}
--
--int
--ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
--{
-- unsigned long flags;
-- int rc;
--
-- rc = ksocknal_transmit (conn, tx);
--
-- CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
--
-- if (tx->tx_resid == 0) {
-- /* Sent everything OK */
-- LASSERT (rc == 0);
--
-- ksocknal_tx_launched (tx);
-- return (0);
-- }
--
-- if (rc == -EAGAIN)
-- return (rc);
--
-- if (rc == -ENOMEM) {
-- /* Queue on ksnd_enomem_conns for retry after a timeout */
-- spin_lock_irqsave(&ksocknal_data.ksnd_reaper_lock, flags);
--
-- /* enomem list takes over scheduler's ref... */
-- LASSERT (conn->ksnc_tx_scheduled);
-- list_add_tail(&conn->ksnc_tx_list,
-- &ksocknal_data.ksnd_enomem_conns);
-- if (!time_after_eq(jiffies + SOCKNAL_ENOMEM_RETRY,
-- ksocknal_data.ksnd_reaper_waketime))
-- wake_up (&ksocknal_data.ksnd_reaper_waitq);
--
-- spin_unlock_irqrestore(&ksocknal_data.ksnd_reaper_lock, flags);
-- return (rc);
-- }
--
-- /* Actual error */
-- LASSERT (rc < 0);
--
-- if (!conn->ksnc_closing)
-- CERROR("[%p] Error %d on write to "LPX64
-- " ip %d.%d.%d.%d:%d\n", conn, rc,
-- conn->ksnc_peer->ksnp_nid,
-- HIPQUAD(conn->ksnc_ipaddr),
-- conn->ksnc_port);
--
-- ksocknal_close_conn_and_siblings (conn, rc);
-- ksocknal_tx_launched (tx);
--
-- return (rc);
--}
--
--void
--ksocknal_launch_autoconnect_locked (ksock_route_t *route)
--{
-- unsigned long flags;
--
-- /* called holding write lock on ksnd_global_lock */
--
-- LASSERT (!route->ksnr_deleted);
-- LASSERT ((route->ksnr_connected & (1 << SOCKNAL_CONN_ANY)) == 0);
-- LASSERT ((route->ksnr_connected & KSNR_TYPED_ROUTES) != KSNR_TYPED_ROUTES);
- LASSERT (!route->ksnr_connecting);
- LASSERT (route->ksnr_connecting == 0);
--
- if (ksocknal_data.ksnd_typed_conns)
- if (ksocknal_tunables.ksnd_typed_conns)
-- route->ksnr_connecting =
-- KSNR_TYPED_ROUTES & ~route->ksnr_connected;
-- else
-- route->ksnr_connecting = (1 << SOCKNAL_CONN_ANY);
--
-- atomic_inc (&route->ksnr_refcount); /* extra ref for asynchd */
--
-- spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
--
-- list_add_tail (&route->ksnr_connect_list,
-- &ksocknal_data.ksnd_autoconnectd_routes);
-- wake_up (&ksocknal_data.ksnd_autoconnectd_waitq);
--
-- spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags);
--}
--
--ksock_peer_t *
--ksocknal_find_target_peer_locked (ksock_tx_t *tx, ptl_nid_t nid)
--{
-- char ipbuf[PTL_NALFMT_SIZE];
-- ptl_nid_t target_nid;
-- int rc;
-- ksock_peer_t *peer = ksocknal_find_peer_locked (nid);
--
-- if (peer != NULL)
-- return (peer);
--
-- if (tx->tx_isfwd) {
-- CERROR ("Can't send packet to "LPX64
-- " %s: routed target is not a peer\n",
-- nid, portals_nid2str(SOCKNAL, nid, ipbuf));
-- return (NULL);
-- }
--
-- rc = kpr_lookup (&ksocknal_data.ksnd_router, nid, tx->tx_nob,
-- &target_nid);
-- if (rc != 0) {
-- CERROR ("Can't route to "LPX64" %s: router error %d\n",
-- nid, portals_nid2str(SOCKNAL, nid, ipbuf), rc);
-- return (NULL);
-- }
--
-- peer = ksocknal_find_peer_locked (target_nid);
-- if (peer != NULL)
-- return (peer);
--
-- CERROR ("Can't send packet to "LPX64" %s: no peer entry\n",
-- target_nid, portals_nid2str(SOCKNAL, target_nid, ipbuf));
-- return (NULL);
--}
--
--ksock_conn_t *
--ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer)
--{
-- struct list_head *tmp;
-- ksock_conn_t *typed = NULL;
-- int tnob = 0;
-- ksock_conn_t *fallback = NULL;
-- int fnob = 0;
- ksock_conn_t *conn;
--
- /* Find the conn with the shortest tx queue */
-- list_for_each (tmp, &peer->ksnp_conns) {
-- ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list);
-#if SOCKNAL_ROUND_ROBIN
- const int nob = 0;
-#else
-- int nob = atomic_read(&c->ksnc_tx_nob) +
-- c->ksnc_sock->sk->sk_wmem_queued;
-
-#endif
-- LASSERT (!c->ksnc_closing);
--
-- if (fallback == NULL || nob < fnob) {
-- fallback = c;
-- fnob = nob;
-- }
--
- if (!ksocknal_data.ksnd_typed_conns)
- if (!ksocknal_tunables.ksnd_typed_conns)
-- continue;
--
-- switch (c->ksnc_type) {
-- default:
-- LBUG();
-- case SOCKNAL_CONN_ANY:
-- break;
-- case SOCKNAL_CONN_BULK_IN:
-- continue;
-- case SOCKNAL_CONN_BULK_OUT:
- if (tx->tx_nob < ksocknal_data.ksnd_min_bulk)
- if (tx->tx_nob < ksocknal_tunables.ksnd_min_bulk)
-- continue;
-- break;
-- case SOCKNAL_CONN_CONTROL:
- if (tx->tx_nob >= ksocknal_data.ksnd_min_bulk)
- if (tx->tx_nob >= ksocknal_tunables.ksnd_min_bulk)
-- continue;
-- break;
-- }
--
-- if (typed == NULL || nob < tnob) {
-- typed = c;
-- tnob = nob;
-- }
-- }
--
-- /* prefer the typed selection */
- return ((typed != NULL) ? typed : fallback);
- conn = (typed != NULL) ? typed : fallback;
-
-#if SOCKNAL_ROUND_ROBIN
- if (conn != NULL) {
- /* round-robin all else being equal */
- list_del (&conn->ksnc_list);
- list_add_tail (&conn->ksnc_list, &peer->ksnp_conns);
- }
-#endif
- return conn;
--}
--
--void
--ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
--{
-- unsigned long flags;
-- ksock_sched_t *sched = conn->ksnc_scheduler;
--
-- /* called holding global lock (read or irq-write) and caller may
-- * not have dropped this lock between finding conn and calling me,
-- * so we don't need the {get,put}connsock dance to deref
-- * ksnc_sock... */
-- LASSERT(!conn->ksnc_closing);
-- LASSERT(tx->tx_resid == tx->tx_nob);
--
-- CDEBUG (D_NET, "Sending to "LPX64" ip %d.%d.%d.%d:%d\n",
-- conn->ksnc_peer->ksnp_nid,
-- HIPQUAD(conn->ksnc_ipaddr),
-- conn->ksnc_port);
--
-- atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
-- tx->tx_conn = conn;
--
--#if SOCKNAL_ZC
-- zccd_init (&tx->tx_zccd, ksocknal_zc_callback);
-- /* NB this sets 1 ref on zccd, so the callback can only occur after
-- * I've released this ref. */
--#endif
-- spin_lock_irqsave (&sched->kss_lock, flags);
--
- conn->ksnc_tx_deadline = jiffies +
- ksocknal_data.ksnd_io_timeout * HZ;
- mb(); /* order with list_add_tail */
- if (list_empty(&conn->ksnc_tx_queue) &&
- conn->ksnc_sock->sk->sk_wmem_queued == 0) {
- /* First packet starts the timeout */
- conn->ksnc_tx_deadline = jiffies +
- ksocknal_tunables.ksnd_io_timeout * HZ;
- conn->ksnc_tx_bufnob = 0;
- mb(); /* order with adding to tx_queue */
- }
--
-- list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
--
-- if (conn->ksnc_tx_ready && /* able to send */
-- !conn->ksnc_tx_scheduled) { /* not scheduled to send */
-- /* +1 ref for scheduler */
-- atomic_inc (&conn->ksnc_refcount);
-- list_add_tail (&conn->ksnc_tx_list,
-- &sched->kss_tx_conns);
-- conn->ksnc_tx_scheduled = 1;
-- wake_up (&sched->kss_waitq);
-- }
--
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
--}
--
--ksock_route_t *
--ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
--{
-- struct list_head *tmp;
-- ksock_route_t *route;
- ksock_route_t *candidate = NULL;
- int found = 0;
-- int bits;
--
-- list_for_each (tmp, &peer->ksnp_routes) {
-- route = list_entry (tmp, ksock_route_t, ksnr_list);
-- bits = route->ksnr_connected;
-
- if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES ||
- (bits & (1 << SOCKNAL_CONN_ANY)) != 0 ||
- route->ksnr_connecting != 0) {
- /* All typed connections have been established, or
- * an untyped connection has been established, or
- * connections are currently being established */
- found = 1;
-
- /* All typed connections established? */
- if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES)
-- continue;
- }
-
- /* Untyped connection established? */
- if ((bits & (1 << SOCKNAL_CONN_ANY)) != 0)
- continue;
-
- /* connection being established? */
- if (route->ksnr_connecting != 0)
- continue;
--
-- /* too soon to retry this guy? */
-- if (!time_after_eq (jiffies, route->ksnr_timeout))
-- continue;
--
- /* always do eager routes */
- if (route->ksnr_eager)
- return (route);
-
- if (candidate == NULL) {
- /* If we don't find any other route that is fully
- * connected or connecting, the first connectable
- * route is returned. If it fails to connect, it
- * will get placed at the end of the list */
- candidate = route;
- }
- return (route);
-- }
-
- return (found ? NULL : candidate);
-
- return (NULL);
--}
--
--ksock_route_t *
--ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
--{
-- struct list_head *tmp;
-- ksock_route_t *route;
--
-- list_for_each (tmp, &peer->ksnp_routes) {
-- route = list_entry (tmp, ksock_route_t, ksnr_list);
--
-- if (route->ksnr_connecting != 0)
-- return (route);
-- }
--
-- return (NULL);
--}
--
--int
--ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
--{
-- unsigned long flags;
-- ksock_peer_t *peer;
-- ksock_conn_t *conn;
-- ksock_route_t *route;
-- rwlock_t *g_lock;
--
-- /* Ensure the frags we've been given EXACTLY match the number of
-- * bytes we want to send. Many TCP/IP stacks disregard any total
-- * size parameters passed to them and just look at the frags.
-- *
-- * We always expect at least 1 mapped fragment containing the
-- * complete portals header. */
-- LASSERT (lib_iov_nob (tx->tx_niov, tx->tx_iov) +
-- lib_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob);
-- LASSERT (tx->tx_niov >= 1);
-- LASSERT (tx->tx_iov[0].iov_len >= sizeof (ptl_hdr_t));
--
-- CDEBUG (D_NET, "packet %p type %d, nob %d niov %d nkiov %d\n",
-- tx, ((ptl_hdr_t *)tx->tx_iov[0].iov_base)->type,
-- tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
--
-- tx->tx_conn = NULL; /* only set when assigned a conn */
-- tx->tx_resid = tx->tx_nob;
-- tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base;
--
-- g_lock = &ksocknal_data.ksnd_global_lock;
-#if !SOCKNAL_ROUND_ROBIN
-- read_lock (g_lock);
-
-
-- peer = ksocknal_find_target_peer_locked (tx, nid);
-- if (peer == NULL) {
-- read_unlock (g_lock);
-- return (-EHOSTUNREACH);
-- }
--
-- if (ksocknal_find_connectable_route_locked(peer) == NULL) {
-- conn = ksocknal_find_conn_locked (tx, peer);
-- if (conn != NULL) {
-- /* I've got no autoconnect routes that need to be
-- * connecting and I do have an actual connection... */
-- ksocknal_queue_tx_locked (tx, conn);
-- read_unlock (g_lock);
-- return (0);
-- }
-- }
-
- /* Making one or more connections; I'll need a write lock... */
-
- atomic_inc (&peer->ksnp_refcount); /* +1 ref for me while I unlock */
-
- /* I'll need a write lock... */
-- read_unlock (g_lock);
- write_lock_irqsave (g_lock, flags);
-
- if (peer->ksnp_closing) { /* peer deleted as I blocked! */
- write_unlock_irqrestore (g_lock, flags);
- ksocknal_put_peer (peer);
-#endif
- write_lock_irqsave(g_lock, flags);
-
- peer = ksocknal_find_target_peer_locked (tx, nid);
- if (peer == NULL) {
- write_unlock_irqrestore(g_lock, flags);
-- return (-EHOSTUNREACH);
-- }
- ksocknal_put_peer (peer); /* drop ref I got above */
--
-- for (;;) {
-- /* launch any/all autoconnections that need it */
-- route = ksocknal_find_connectable_route_locked (peer);
-- if (route == NULL)
-- break;
--
-- ksocknal_launch_autoconnect_locked (route);
-- }
--
-- conn = ksocknal_find_conn_locked (tx, peer);
-- if (conn != NULL) {
-- /* Connection exists; queue message on it */
-- ksocknal_queue_tx_locked (tx, conn);
-- write_unlock_irqrestore (g_lock, flags);
-- return (0);
-- }
--
-- route = ksocknal_find_connecting_route_locked (peer);
-- if (route != NULL) {
-- /* At least 1 connection is being established; queue the
-- * message... */
-- list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
-- write_unlock_irqrestore (g_lock, flags);
-- return (0);
-- }
--
-- write_unlock_irqrestore (g_lock, flags);
-- return (-EHOSTUNREACH);
--}
--
--ptl_err_t
- ksocknal_sendmsg(nal_cb_t *nal,
-ksocknal_sendmsg(lib_nal_t *nal,
-- void *private,
-- lib_msg_t *cookie,
-- ptl_hdr_t *hdr,
-- int type,
-- ptl_nid_t nid,
-- ptl_pid_t pid,
-- unsigned int payload_niov,
-- struct iovec *payload_iov,
-- ptl_kiov_t *payload_kiov,
-- size_t payload_offset,
-- size_t payload_nob)
--{
-- ksock_ltx_t *ltx;
-- int desc_size;
-- int rc;
--
-- /* NB 'private' is different depending on what we're sending.
-- * Just ignore it... */
--
-- CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
-- " pid %d\n", payload_nob, payload_niov, nid , pid);
--
-- LASSERT (payload_nob == 0 || payload_niov > 0);
-- LASSERT (payload_niov <= PTL_MD_MAX_IOV);
--
-- /* It must be OK to kmap() if required */
-- LASSERT (payload_kiov == NULL || !in_interrupt ());
-- /* payload is either all vaddrs or all pages */
-- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
--
-- if (payload_iov != NULL)
-- desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]);
-- else
-- desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]);
--
-- if (in_interrupt() ||
-- type == PTL_MSG_ACK ||
-- type == PTL_MSG_REPLY) {
-- /* Can't block if in interrupt or responding to an incoming
-- * message */
-- PORTAL_ALLOC_ATOMIC(ltx, desc_size);
-- } else {
-- PORTAL_ALLOC(ltx, desc_size);
-- }
--
-- if (ltx == NULL) {
-- CERROR("Can't allocate tx desc type %d size %d %s\n",
-- type, desc_size, in_interrupt() ? "(intr)" : "");
- return (PTL_NOSPACE);
- return (PTL_NO_SPACE);
-- }
--
-- atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
--
-- ltx->ltx_desc_size = desc_size;
--
-- /* We always have 1 mapped frag for the header */
-- ltx->ltx_tx.tx_iov = ltx->ltx_iov;
-- ltx->ltx_iov[0].iov_base = <x->ltx_hdr;
-- ltx->ltx_iov[0].iov_len = sizeof(*hdr);
-- ltx->ltx_hdr = *hdr;
--
-- ltx->ltx_private = private;
-- ltx->ltx_cookie = cookie;
--
-- ltx->ltx_tx.tx_isfwd = 0;
-- ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob;
--
-- if (payload_iov != NULL) {
-- /* payload is all mapped */
-- ltx->ltx_tx.tx_kiov = NULL;
-- ltx->ltx_tx.tx_nkiov = 0;
--
-- ltx->ltx_tx.tx_niov =
-- 1 + lib_extract_iov(payload_niov, <x->ltx_iov[1],
-- payload_niov, payload_iov,
-- payload_offset, payload_nob);
-- } else {
-- /* payload is all pages */
-- ltx->ltx_tx.tx_niov = 1;
--
-- ltx->ltx_tx.tx_kiov = ltx->ltx_kiov;
-- ltx->ltx_tx.tx_nkiov =
-- lib_extract_kiov(payload_niov, ltx->ltx_kiov,
-- payload_niov, payload_kiov,
-- payload_offset, payload_nob);
-- }
--
-- rc = ksocknal_launch_packet(<x->ltx_tx, nid);
-- if (rc == 0)
-- return (PTL_OK);
--
-- ksocknal_free_ltx(ltx);
-- return (PTL_FAIL);
--}
--
--ptl_err_t
- ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie,
-ksocknal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
-- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-- unsigned int payload_niov, struct iovec *payload_iov,
-- size_t payload_offset, size_t payload_len)
--{
-- return (ksocknal_sendmsg(nal, private, cookie,
-- hdr, type, nid, pid,
-- payload_niov, payload_iov, NULL,
-- payload_offset, payload_len));
--}
--
--ptl_err_t
- ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie,
-ksocknal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie,
-- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-- unsigned int payload_niov, ptl_kiov_t *payload_kiov,
-- size_t payload_offset, size_t payload_len)
--{
-- return (ksocknal_sendmsg(nal, private, cookie,
-- hdr, type, nid, pid,
-- payload_niov, NULL, payload_kiov,
-- payload_offset, payload_len));
--}
--
--void
--ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
--{
-- ptl_nid_t nid = fwd->kprfd_gateway_nid;
-- ksock_ftx_t *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch;
-- int rc;
--
-- CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
-- fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
--
-- /* I'm the gateway; must be the last hop */
- if (nid == ksocknal_lib.ni.nid)
- if (nid == ksocknal_lib.libnal_ni.ni_pid.nid)
-- nid = fwd->kprfd_target_nid;
--
-- /* setup iov for hdr */
-- ftx->ftx_iov.iov_base = fwd->kprfd_hdr;
-- ftx->ftx_iov.iov_len = sizeof(ptl_hdr_t);
--
-- ftx->ftx_tx.tx_isfwd = 1; /* This is a forwarding packet */
-- ftx->ftx_tx.tx_nob = sizeof(ptl_hdr_t) + fwd->kprfd_nob;
-- ftx->ftx_tx.tx_niov = 1;
-- ftx->ftx_tx.tx_iov = &ftx->ftx_iov;
-- ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov;
-- ftx->ftx_tx.tx_kiov = fwd->kprfd_kiov;
--
-- rc = ksocknal_launch_packet (&ftx->ftx_tx, nid);
-- if (rc != 0)
-- kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, rc);
--}
--
--int
--ksocknal_thread_start (int (*fn)(void *arg), void *arg)
--{
- long pid = kernel_thread (fn, arg, 0);
- long pid = kernel_thread (fn, arg, 0);
- unsigned long flags;
--
-- if (pid < 0)
-- return ((int)pid);
--
- atomic_inc (&ksocknal_data.ksnd_nthreads);
- write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
- ksocknal_data.ksnd_nthreads++;
- write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
-- return (0);
--}
--
--void
--ksocknal_thread_fini (void)
--{
- atomic_dec (&ksocknal_data.ksnd_nthreads);
- unsigned long flags;
-
- write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
- ksocknal_data.ksnd_nthreads--;
- write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
--}
--
--void
--ksocknal_fmb_callback (void *arg, int error)
--{
-- ksock_fmb_t *fmb = (ksock_fmb_t *)arg;
-- ksock_fmb_pool_t *fmp = fmb->fmb_pool;
-- ptl_hdr_t *hdr = &fmb->fmb_hdr;
-- ksock_conn_t *conn = NULL;
-- ksock_sched_t *sched;
-- unsigned long flags;
-- char ipbuf[PTL_NALFMT_SIZE];
-- char ipbuf2[PTL_NALFMT_SIZE];
--
-- if (error != 0)
-- CERROR("Failed to route packet from "
-- LPX64" %s to "LPX64" %s: %d\n",
- NTOH__u64(hdr->src_nid),
- portals_nid2str(SOCKNAL, NTOH__u64(hdr->src_nid), ipbuf),
- NTOH__u64(hdr->dest_nid),
- portals_nid2str(SOCKNAL, NTOH__u64(hdr->dest_nid), ipbuf2),
- le64_to_cpu(hdr->src_nid),
- portals_nid2str(SOCKNAL, le64_to_cpu(hdr->src_nid), ipbuf),
- le64_to_cpu(hdr->dest_nid),
- portals_nid2str(SOCKNAL, le64_to_cpu(hdr->dest_nid), ipbuf2),
-- error);
-- else
-- CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": OK\n",
- NTOH__u64 (hdr->src_nid), NTOH__u64 (hdr->dest_nid));
- le64_to_cpu(hdr->src_nid), le64_to_cpu(hdr->dest_nid));
--
-- /* drop peer ref taken on init */
-- ksocknal_put_peer (fmb->fmb_peer);
--
-- spin_lock_irqsave (&fmp->fmp_lock, flags);
--
-- list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs);
-- fmp->fmp_nactive_fmbs--;
--
-- if (!list_empty (&fmp->fmp_blocked_conns)) {
-- conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next,
-- ksock_conn_t, ksnc_rx_list);
-- list_del (&conn->ksnc_rx_list);
-- }
--
-- spin_unlock_irqrestore (&fmp->fmp_lock, flags);
--
-- if (conn == NULL)
-- return;
--
-- CDEBUG (D_NET, "Scheduling conn %p\n", conn);
-- LASSERT (conn->ksnc_rx_scheduled);
-- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP);
--
-- conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB;
--
-- sched = conn->ksnc_scheduler;
--
-- spin_lock_irqsave (&sched->kss_lock, flags);
--
-- list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns);
-- wake_up (&sched->kss_waitq);
--
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
--}
--
--ksock_fmb_t *
--ksocknal_get_idle_fmb (ksock_conn_t *conn)
--{
-- int payload_nob = conn->ksnc_rx_nob_left;
-- unsigned long flags;
-- ksock_fmb_pool_t *pool;
-- ksock_fmb_t *fmb;
--
-- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
-- LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
--
-- if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
-- pool = &ksocknal_data.ksnd_small_fmp;
-- else
-- pool = &ksocknal_data.ksnd_large_fmp;
--
-- spin_lock_irqsave (&pool->fmp_lock, flags);
--
-- if (!list_empty (&pool->fmp_idle_fmbs)) {
-- fmb = list_entry(pool->fmp_idle_fmbs.next,
-- ksock_fmb_t, fmb_list);
-- list_del (&fmb->fmb_list);
-- pool->fmp_nactive_fmbs++;
-- spin_unlock_irqrestore (&pool->fmp_lock, flags);
--
-- return (fmb);
-- }
--
-- /* deschedule until fmb free */
--
-- conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP;
--
-- list_add_tail (&conn->ksnc_rx_list,
-- &pool->fmp_blocked_conns);
--
-- spin_unlock_irqrestore (&pool->fmp_lock, flags);
-- return (NULL);
--}
--
--int
--ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
--{
-- int payload_nob = conn->ksnc_rx_nob_left;
- ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
- ptl_nid_t dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid);
-- int niov = 0;
-- int nob = payload_nob;
--
-- LASSERT (conn->ksnc_rx_scheduled);
-- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
-- LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
-- LASSERT (payload_nob >= 0);
-- LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * PAGE_SIZE);
-- LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
-- LASSERT (fmb->fmb_kiov[0].kiov_offset == 0);
--
-- /* Take a ref on the conn's peer to prevent module unload before
-- * forwarding completes. */
-- fmb->fmb_peer = conn->ksnc_peer;
-- atomic_inc (&conn->ksnc_peer->ksnp_refcount);
--
-- /* Copy the header we just read into the forwarding buffer. If
-- * there's payload, start reading reading it into the buffer,
-- * otherwise the forwarding buffer can be kicked off
-- * immediately. */
-- fmb->fmb_hdr = conn->ksnc_hdr;
--
-- while (nob > 0) {
-- LASSERT (niov < fmb->fmb_pool->fmp_buff_pages);
-- LASSERT (fmb->fmb_kiov[niov].kiov_offset == 0);
-- fmb->fmb_kiov[niov].kiov_len = MIN (PAGE_SIZE, nob);
-- nob -= PAGE_SIZE;
-- niov++;
-- }
--
-- kpr_fwd_init(&fmb->fmb_fwd, dest_nid, &fmb->fmb_hdr,
-- payload_nob, niov, fmb->fmb_kiov,
-- ksocknal_fmb_callback, fmb);
--
-- if (payload_nob == 0) { /* got complete packet already */
-- CDEBUG (D_NET, "%p "LPX64"->"LPX64" fwd_start (immediate)\n",
- conn, NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid);
- conn, le64_to_cpu(conn->ksnc_hdr.src_nid), dest_nid);
--
-- kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
--
-- ksocknal_new_packet (conn, 0); /* on to next packet */
-- return (1);
-- }
--
-- conn->ksnc_cookie = fmb; /* stash fmb for later */
-- conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
--
-- /* Set up conn->ksnc_rx_kiov to read the payload into fmb's kiov-ed
-- * buffer */
-- LASSERT (niov <= sizeof(conn->ksnc_rx_iov_space)/sizeof(ptl_kiov_t));
--
-- conn->ksnc_rx_niov = 0;
-- conn->ksnc_rx_nkiov = niov;
-- conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
-- memcpy(conn->ksnc_rx_kiov, fmb->fmb_kiov, niov * sizeof(ptl_kiov_t));
--
-- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
- NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
- le64_to_cpu(conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
-- return (0);
--}
--
--void
--ksocknal_fwd_parse (ksock_conn_t *conn)
--{
-- ksock_peer_t *peer;
- ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid);
- ptl_nid_t src_nid = NTOH__u64 (conn->ksnc_hdr.src_nid);
- int body_len = NTOH__u32 (conn->ksnc_hdr.payload_length);
- ptl_nid_t dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid);
- ptl_nid_t src_nid = le64_to_cpu(conn->ksnc_hdr.src_nid);
- int body_len = le32_to_cpu(conn->ksnc_hdr.payload_length);
-- char str[PTL_NALFMT_SIZE];
-- char str2[PTL_NALFMT_SIZE];
--
-- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn,
-- src_nid, dest_nid, conn->ksnc_rx_nob_left);
--
-- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER);
-- LASSERT (conn->ksnc_rx_scheduled);
--
-- if (body_len < 0) { /* length corrupt (overflow) */
-- CERROR("dropping packet from "LPX64" (%s) for "LPX64" (%s): "
-- "packet size %d illegal\n",
-- src_nid, portals_nid2str(TCPNAL, src_nid, str),
-- dest_nid, portals_nid2str(TCPNAL, dest_nid, str2),
-- body_len);
--
-- ksocknal_new_packet (conn, 0); /* on to new packet */
-- return;
-- }
--
-- if (!kpr_routing(&ksocknal_data.ksnd_router)) { /* not forwarding */
-- CERROR("dropping packet from "LPX64" (%s) for "LPX64
-- " (%s): not forwarding\n",
-- src_nid, portals_nid2str(TCPNAL, src_nid, str),
-- dest_nid, portals_nid2str(TCPNAL, dest_nid, str2));
-- /* on to new packet (skip this one's body) */
-- ksocknal_new_packet (conn, body_len);
-- return;
-- }
--
-- if (body_len > PTL_MTU) { /* too big to forward */
-- CERROR ("dropping packet from "LPX64" (%s) for "LPX64
-- "(%s): packet size %d too big\n",
-- src_nid, portals_nid2str(TCPNAL, src_nid, str),
-- dest_nid, portals_nid2str(TCPNAL, dest_nid, str2),
-- body_len);
-- /* on to new packet (skip this one's body) */
-- ksocknal_new_packet (conn, body_len);
-- return;
-- }
--
-- /* should have gone direct */
-- peer = ksocknal_get_peer (conn->ksnc_hdr.dest_nid);
-- if (peer != NULL) {
-- CERROR ("dropping packet from "LPX64" (%s) for "LPX64
-- "(%s): target is a peer\n",
-- src_nid, portals_nid2str(TCPNAL, src_nid, str),
-- dest_nid, portals_nid2str(TCPNAL, dest_nid, str2));
-- ksocknal_put_peer (peer); /* drop ref from get above */
--
-- /* on to next packet (skip this one's body) */
-- ksocknal_new_packet (conn, body_len);
-- return;
-- }
--
-- conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */
-- conn->ksnc_rx_nob_left = body_len; /* stash packet size */
-- conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */
--}
--
--int
--ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip)
--{
-- static char ksocknal_slop_buffer[4096];
--
-- int nob;
-- int niov;
-- int skipped;
--
-- if (nob_to_skip == 0) { /* right at next packet boundary now */
-- conn->ksnc_rx_started = 0;
-- mb (); /* racing with timeout thread */
--
-- conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
-- conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t);
-- conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t);
--
-- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
-- conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr;
-- conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t);
-- conn->ksnc_rx_niov = 1;
--
-- conn->ksnc_rx_kiov = NULL;
-- conn->ksnc_rx_nkiov = 0;
-- return (1);
-- }
--
-- /* Set up to skip as much a possible now. If there's more left
-- * (ran out of iov entries) we'll get called again */
--
-- conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
-- conn->ksnc_rx_nob_left = nob_to_skip;
-- conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
-- skipped = 0;
-- niov = 0;
--
-- do {
-- nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer));
--
-- conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
-- conn->ksnc_rx_iov[niov].iov_len = nob;
-- niov++;
-- skipped += nob;
-- nob_to_skip -=nob;
--
-- } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
-- niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec));
--
-- conn->ksnc_rx_niov = niov;
-- conn->ksnc_rx_kiov = NULL;
-- conn->ksnc_rx_nkiov = 0;
-- conn->ksnc_rx_nob_wanted = skipped;
-- return (0);
--}
--
--int
--ksocknal_process_receive (ksock_conn_t *conn)
--{
-- ksock_fmb_t *fmb;
-- int rc;
--
-- LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
--
-- /* doesn't need a forwarding buffer */
-- if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB)
-- goto try_read;
--
-- get_fmb:
-- fmb = ksocknal_get_idle_fmb (conn);
-- if (fmb == NULL) {
-- /* conn descheduled waiting for idle fmb */
-- return (0);
-- }
--
-- if (ksocknal_init_fmb (conn, fmb)) {
-- /* packet forwarded */
-- return (0);
-- }
--
-- try_read:
-- /* NB: sched lock NOT held */
-- LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER ||
-- conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
-- conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD ||
-- conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
--
-- LASSERT (conn->ksnc_rx_nob_wanted > 0);
--
-- rc = ksocknal_receive(conn);
--
-- if (rc <= 0) {
-- LASSERT (rc != -EAGAIN);
--
-- if (rc == 0)
-- CWARN ("[%p] EOF from "LPX64" ip %d.%d.%d.%d:%d\n",
-- conn, conn->ksnc_peer->ksnp_nid,
-- HIPQUAD(conn->ksnc_ipaddr),
-- conn->ksnc_port);
-- else if (!conn->ksnc_closing)
-- CERROR ("[%p] Error %d on read from "LPX64
-- " ip %d.%d.%d.%d:%d\n",
-- conn, rc, conn->ksnc_peer->ksnp_nid,
-- HIPQUAD(conn->ksnc_ipaddr),
-- conn->ksnc_port);
--
-- ksocknal_close_conn_and_siblings (conn, rc);
-- return (rc == 0 ? -ESHUTDOWN : rc);
-- }
--
-- if (conn->ksnc_rx_nob_wanted != 0) {
-- /* short read */
-- return (-EAGAIN);
-- }
--
-- switch (conn->ksnc_rx_state) {
-- case SOCKNAL_RX_HEADER:
- if (conn->ksnc_hdr.type != HTON__u32(PTL_MSG_HELLO) &&
- NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) {
- if (conn->ksnc_hdr.type != cpu_to_le32(PTL_MSG_HELLO) &&
- le64_to_cpu(conn->ksnc_hdr.dest_nid) !=
- ksocknal_lib.libnal_ni.ni_pid.nid) {
-- /* This packet isn't for me */
-- ksocknal_fwd_parse (conn);
-- switch (conn->ksnc_rx_state) {
-- case SOCKNAL_RX_HEADER: /* skipped (zero payload) */
-- return (0); /* => come back later */
-- case SOCKNAL_RX_SLOP: /* skipping packet's body */
-- goto try_read; /* => go read it */
-- case SOCKNAL_RX_GET_FMB: /* forwarding */
-- goto get_fmb; /* => go get a fwd msg buffer */
-- default:
-- LBUG ();
-- }
-- /* Not Reached */
-- }
--
-- /* sets wanted_len, iovs etc */
- lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
- rc = lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
-
- if (rc != PTL_OK) {
- /* I just received garbage: give up on this conn */
- ksocknal_close_conn_and_siblings (conn, rc);
- return (-EPROTO);
- }
--
-- if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */
-- conn->ksnc_rx_state = SOCKNAL_RX_BODY;
-- goto try_read; /* go read the payload */
-- }
-- /* Fall through (completed packet for me) */
--
-- case SOCKNAL_RX_BODY:
-- /* payload all received */
-- lib_finalize(&ksocknal_lib, NULL, conn->ksnc_cookie, PTL_OK);
-- /* Fall through */
--
-- case SOCKNAL_RX_SLOP:
-- /* starting new packet? */
-- if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left))
-- return (0); /* come back later */
-- goto try_read; /* try to finish reading slop now */
--
-- case SOCKNAL_RX_BODY_FWD:
-- /* payload all received */
-- CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n",
- conn, NTOH__u64 (conn->ksnc_hdr.src_nid),
- NTOH__u64 (conn->ksnc_hdr.dest_nid),
- conn, le64_to_cpu(conn->ksnc_hdr.src_nid),
- le64_to_cpu(conn->ksnc_hdr.dest_nid),
-- conn->ksnc_rx_nob_left);
--
-- /* forward the packet. NB ksocknal_init_fmb() put fmb into
-- * conn->ksnc_cookie */
-- fmb = (ksock_fmb_t *)conn->ksnc_cookie;
-- kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
--
-- /* no slop in forwarded packets */
-- LASSERT (conn->ksnc_rx_nob_left == 0);
--
-- ksocknal_new_packet (conn, 0); /* on to next packet */
-- return (0); /* (later) */
--
-- default:
-- break;
-- }
--
-- /* Not Reached */
-- LBUG ();
-- return (-EINVAL); /* keep gcc happy */
--}
--
--ptl_err_t
- ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg,
-ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
-- unsigned int niov, struct iovec *iov,
-- size_t offset, size_t mlen, size_t rlen)
--{
-- ksock_conn_t *conn = (ksock_conn_t *)private;
--
-- LASSERT (mlen <= rlen);
-- LASSERT (niov <= PTL_MD_MAX_IOV);
--
-- conn->ksnc_cookie = msg;
-- conn->ksnc_rx_nob_wanted = mlen;
-- conn->ksnc_rx_nob_left = rlen;
--
-- conn->ksnc_rx_nkiov = 0;
-- conn->ksnc_rx_kiov = NULL;
-- conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
-- conn->ksnc_rx_niov =
-- lib_extract_iov(PTL_MD_MAX_IOV, conn->ksnc_rx_iov,
-- niov, iov, offset, mlen);
--
-- LASSERT (mlen ==
-- lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
-- lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
--
-- return (PTL_OK);
--}
--
--ptl_err_t
- ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg,
-ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
-- unsigned int niov, ptl_kiov_t *kiov,
-- size_t offset, size_t mlen, size_t rlen)
--{
-- ksock_conn_t *conn = (ksock_conn_t *)private;
--
-- LASSERT (mlen <= rlen);
-- LASSERT (niov <= PTL_MD_MAX_IOV);
--
-- conn->ksnc_cookie = msg;
-- conn->ksnc_rx_nob_wanted = mlen;
-- conn->ksnc_rx_nob_left = rlen;
--
-- conn->ksnc_rx_niov = 0;
-- conn->ksnc_rx_iov = NULL;
-- conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
-- conn->ksnc_rx_nkiov =
-- lib_extract_kiov(PTL_MD_MAX_IOV, conn->ksnc_rx_kiov,
-- niov, kiov, offset, mlen);
--
-- LASSERT (mlen ==
-- lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
-- lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
--
-- return (PTL_OK);
-}
-
-static inline int
-ksocknal_sched_cansleep(ksock_sched_t *sched)
-{
- unsigned long flags;
- int rc;
-
- spin_lock_irqsave(&sched->kss_lock, flags);
-
- rc = (!ksocknal_data.ksnd_shuttingdown &&
-#if SOCKNAL_ZC
- list_empty(&sched->kss_zctxdone_list) &&
-#endif
- list_empty(&sched->kss_rx_conns) &&
- list_empty(&sched->kss_tx_conns));
-
- spin_unlock_irqrestore(&sched->kss_lock, flags);
- return (rc);
--}
--
--int ksocknal_scheduler (void *arg)
--{
-- ksock_sched_t *sched = (ksock_sched_t *)arg;
-- ksock_conn_t *conn;
-- ksock_tx_t *tx;
-- unsigned long flags;
-- int rc;
-- int nloops = 0;
-- int id = sched - ksocknal_data.ksnd_schedulers;
-- char name[16];
--
-- snprintf (name, sizeof (name),"ksocknald_%02d", id);
-- kportal_daemonize (name);
-- kportal_blockallsigs ();
--
--#if (CONFIG_SMP && CPU_AFFINITY)
- id = ksocknal_sched2cpu(id);
-- if (cpu_online(id)) {
-- cpumask_t m;
-- cpu_set(id, m);
-- set_cpus_allowed(current, m);
-- } else {
- CERROR ("Can't set CPU affinity for %s\n", name);
- CERROR ("Can't set CPU affinity for %s to %d\n", name, id);
-- }
--#endif /* CONFIG_SMP && CPU_AFFINITY */
--
-- spin_lock_irqsave (&sched->kss_lock, flags);
--
-- while (!ksocknal_data.ksnd_shuttingdown) {
-- int did_something = 0;
--
-- /* Ensure I progress everything semi-fairly */
--
-- if (!list_empty (&sched->kss_rx_conns)) {
-- conn = list_entry(sched->kss_rx_conns.next,
-- ksock_conn_t, ksnc_rx_list);
-- list_del(&conn->ksnc_rx_list);
--
-- LASSERT(conn->ksnc_rx_scheduled);
-- LASSERT(conn->ksnc_rx_ready);
--
-- /* clear rx_ready in case receive isn't complete.
-- * Do it BEFORE we call process_recv, since
-- * data_ready can set it any time after we release
-- * kss_lock. */
-- conn->ksnc_rx_ready = 0;
-- spin_unlock_irqrestore(&sched->kss_lock, flags);
--
-- rc = ksocknal_process_receive(conn);
--
-- spin_lock_irqsave(&sched->kss_lock, flags);
--
-- /* I'm the only one that can clear this flag */
-- LASSERT(conn->ksnc_rx_scheduled);
--
-- /* Did process_receive get everything it wanted? */
-- if (rc == 0)
-- conn->ksnc_rx_ready = 1;
--
-- if (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP ||
-- conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB) {
-- /* Conn blocked for a forwarding buffer.
-- * It will get queued for my attention when
-- * one becomes available (and it might just
-- * already have been!). Meanwhile my ref
-- * on it stays put. */
-- } else if (conn->ksnc_rx_ready) {
-- /* reschedule for rx */
-- list_add_tail (&conn->ksnc_rx_list,
-- &sched->kss_rx_conns);
-- } else {
-- conn->ksnc_rx_scheduled = 0;
-- /* drop my ref */
-- ksocknal_put_conn(conn);
-- }
--
-- did_something = 1;
-- }
--
-- if (!list_empty (&sched->kss_tx_conns)) {
-- conn = list_entry(sched->kss_tx_conns.next,
-- ksock_conn_t, ksnc_tx_list);
-- list_del (&conn->ksnc_tx_list);
--
-- LASSERT(conn->ksnc_tx_scheduled);
-- LASSERT(conn->ksnc_tx_ready);
-- LASSERT(!list_empty(&conn->ksnc_tx_queue));
--
-- tx = list_entry(conn->ksnc_tx_queue.next,
-- ksock_tx_t, tx_list);
-- /* dequeue now so empty list => more to send */
-- list_del(&tx->tx_list);
--
-- /* Clear tx_ready in case send isn't complete. Do
-- * it BEFORE we call process_transmit, since
-- * write_space can set it any time after we release
-- * kss_lock. */
-- conn->ksnc_tx_ready = 0;
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
--
-- rc = ksocknal_process_transmit(conn, tx);
--
-- spin_lock_irqsave (&sched->kss_lock, flags);
--
-- if (rc == -ENOMEM || rc == -EAGAIN) {
-- /* Incomplete send: replace tx on HEAD of tx_queue */
-- list_add (&tx->tx_list, &conn->ksnc_tx_queue);
-- } else {
-- /* Complete send; assume space for more */
-- conn->ksnc_tx_ready = 1;
-- }
--
-- if (rc == -ENOMEM) {
-- /* Do nothing; after a short timeout, this
-- * conn will be reposted on kss_tx_conns. */
-- } else if (conn->ksnc_tx_ready &&
-- !list_empty (&conn->ksnc_tx_queue)) {
-- /* reschedule for tx */
-- list_add_tail (&conn->ksnc_tx_list,
-- &sched->kss_tx_conns);
-- } else {
-- conn->ksnc_tx_scheduled = 0;
-- /* drop my ref */
-- ksocknal_put_conn (conn);
-- }
--
-- did_something = 1;
-- }
--#if SOCKNAL_ZC
-- if (!list_empty (&sched->kss_zctxdone_list)) {
-- ksock_tx_t *tx =
-- list_entry(sched->kss_zctxdone_list.next,
-- ksock_tx_t, tx_list);
-- did_something = 1;
--
-- list_del (&tx->tx_list);
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
--
-- ksocknal_tx_done (tx, 1);
--
-- spin_lock_irqsave (&sched->kss_lock, flags);
-- }
--#endif
-- if (!did_something || /* nothing to do */
-- ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
--
-- nloops = 0;
--
-- if (!did_something) { /* wait for something to do */
- #if SOCKNAL_ZC
- rc = wait_event_interruptible (sched->kss_waitq,
- ksocknal_data.ksnd_shuttingdown ||
- !list_empty(&sched->kss_rx_conns) ||
- !list_empty(&sched->kss_tx_conns) ||
- !list_empty(&sched->kss_zctxdone_list));
- #else
-- rc = wait_event_interruptible (sched->kss_waitq,
- ksocknal_data.ksnd_shuttingdown ||
- !list_empty(&sched->kss_rx_conns) ||
- !list_empty(&sched->kss_tx_conns));
- #endif
- !ksocknal_sched_cansleep(sched));
-- LASSERT (rc == 0);
-- } else
-- our_cond_resched();
--
-- spin_lock_irqsave (&sched->kss_lock, flags);
-- }
-- }
--
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
-- ksocknal_thread_fini ();
-- return (0);
--}
--
--void
--ksocknal_data_ready (struct sock *sk, int n)
--{
-- unsigned long flags;
-- ksock_conn_t *conn;
-- ksock_sched_t *sched;
-- ENTRY;
--
-- /* interleave correctly with closing sockets... */
-- read_lock (&ksocknal_data.ksnd_global_lock);
--
-- conn = sk->sk_user_data;
-- if (conn == NULL) { /* raced with ksocknal_terminate_conn */
-- LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
-- sk->sk_data_ready (sk, n);
-- } else {
-- sched = conn->ksnc_scheduler;
--
-- spin_lock_irqsave (&sched->kss_lock, flags);
--
-- conn->ksnc_rx_ready = 1;
--
-- if (!conn->ksnc_rx_scheduled) { /* not being progressed */
-- list_add_tail(&conn->ksnc_rx_list,
-- &sched->kss_rx_conns);
-- conn->ksnc_rx_scheduled = 1;
-- /* extra ref for scheduler */
-- atomic_inc (&conn->ksnc_refcount);
--
-- wake_up (&sched->kss_waitq);
-- }
--
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
-- }
--
-- read_unlock (&ksocknal_data.ksnd_global_lock);
--
-- EXIT;
--}
--
--void
--ksocknal_write_space (struct sock *sk)
--{
-- unsigned long flags;
-- ksock_conn_t *conn;
-- ksock_sched_t *sched;
--
-- /* interleave correctly with closing sockets... */
-- read_lock (&ksocknal_data.ksnd_global_lock);
--
-- conn = sk->sk_user_data;
--
-- CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
-- sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn,
-- (conn == NULL) ? "" : (conn->ksnc_tx_ready ?
-- " ready" : " blocked"),
-- (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
-- " scheduled" : " idle"),
-- (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
-- " empty" : " queued"));
--
-- if (conn == NULL) { /* raced with ksocknal_terminate_conn */
-- LASSERT (sk->sk_write_space != &ksocknal_write_space);
-- sk->sk_write_space (sk);
--
-- read_unlock (&ksocknal_data.ksnd_global_lock);
-- return;
-- }
--
-- if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */
-- clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
--
-- sched = conn->ksnc_scheduler;
--
-- spin_lock_irqsave (&sched->kss_lock, flags);
--
-- conn->ksnc_tx_ready = 1;
--
-- if (!conn->ksnc_tx_scheduled && // not being progressed
-- !list_empty(&conn->ksnc_tx_queue)){//packets to send
-- list_add_tail (&conn->ksnc_tx_list,
-- &sched->kss_tx_conns);
-- conn->ksnc_tx_scheduled = 1;
-- /* extra ref for scheduler */
-- atomic_inc (&conn->ksnc_refcount);
--
-- wake_up (&sched->kss_waitq);
-- }
--
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
-- }
--
-- read_unlock (&ksocknal_data.ksnd_global_lock);
--}
--
--int
--ksocknal_sock_write (struct socket *sock, void *buffer, int nob)
--{
-- int rc;
-- mm_segment_t oldmm = get_fs();
--
-- while (nob > 0) {
-- struct iovec iov = {
-- .iov_base = buffer,
-- .iov_len = nob
-- };
-- struct msghdr msg = {
-- .msg_name = NULL,
-- .msg_namelen = 0,
-- .msg_iov = &iov,
-- .msg_iovlen = 1,
-- .msg_control = NULL,
-- .msg_controllen = 0,
-- .msg_flags = 0
-- };
--
-- set_fs (KERNEL_DS);
-- rc = sock_sendmsg (sock, &msg, iov.iov_len);
-- set_fs (oldmm);
--
-- if (rc < 0)
-- return (rc);
--
-- if (rc == 0) {
-- CERROR ("Unexpected zero rc\n");
-- return (-ECONNABORTED);
-- }
--
-- buffer = ((char *)buffer) + rc;
-- nob -= rc;
-- }
--
-- return (0);
--}
--
--int
--ksocknal_sock_read (struct socket *sock, void *buffer, int nob)
--{
-- int rc;
-- mm_segment_t oldmm = get_fs();
--
-- while (nob > 0) {
-- struct iovec iov = {
-- .iov_base = buffer,
-- .iov_len = nob
-- };
-- struct msghdr msg = {
-- .msg_name = NULL,
-- .msg_namelen = 0,
-- .msg_iov = &iov,
-- .msg_iovlen = 1,
-- .msg_control = NULL,
-- .msg_controllen = 0,
-- .msg_flags = 0
-- };
--
-- set_fs (KERNEL_DS);
-- rc = sock_recvmsg (sock, &msg, iov.iov_len, 0);
-- set_fs (oldmm);
--
-- if (rc < 0)
-- return (rc);
--
-- if (rc == 0)
-- return (-ECONNABORTED);
--
-- buffer = ((char *)buffer) + rc;
-- nob -= rc;
-- }
--
-- return (0);
--}
--
--int
- ksocknal_hello (struct socket *sock, ptl_nid_t *nid, int *type,
- __u64 *incarnation)
-ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs)
--{
- int rc;
- /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
- struct socket *sock = conn->ksnc_sock;
-- ptl_hdr_t hdr;
-- ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
- char ipbuf[PTL_NALFMT_SIZE];
- char ipbuf2[PTL_NALFMT_SIZE];
- int i;
- int rc;
--
- LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
- LASSERT (conn->ksnc_type != SOCKNAL_CONN_NONE);
- LASSERT (nipaddrs <= SOCKNAL_MAX_INTERFACES);
--
- memset (&hdr, 0, sizeof (hdr));
- hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC);
- hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
- hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
- /* No need for getconnsock/putconnsock */
- LASSERT (!conn->ksnc_closing);
--
- hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid);
- hdr.type = __cpu_to_le32 (PTL_MSG_HELLO);
- LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
- hmv->magic = cpu_to_le32 (PORTALS_PROTO_MAGIC);
- hmv->version_major = cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
- hmv->version_minor = cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
--
- hdr.msg.hello.type = __cpu_to_le32 (*type);
- hdr.src_nid = cpu_to_le64 (ksocknal_lib.libnal_ni.ni_pid.nid);
- hdr.type = cpu_to_le32 (PTL_MSG_HELLO);
- hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs));
-
- hdr.msg.hello.type = cpu_to_le32 (conn->ksnc_type);
-- hdr.msg.hello.incarnation =
- __cpu_to_le64 (ksocknal_data.ksnd_incarnation);
- cpu_to_le64 (ksocknal_data.ksnd_incarnation);
--
- /* Assume sufficient socket buffering for this message */
- rc = ksocknal_sock_write (sock, &hdr, sizeof (hdr));
- /* Receiver is eager */
- rc = ksocknal_sock_write (sock, &hdr, sizeof(hdr));
-- if (rc != 0) {
- CERROR ("Error %d sending HELLO to "LPX64" %s\n",
- rc, *nid, portals_nid2str(SOCKNAL, *nid, ipbuf));
- CERROR ("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
-- return (rc);
- }
-
- if (nipaddrs == 0)
- return (0);
-
- for (i = 0; i < nipaddrs; i++) {
- ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]);
- }
-
- rc = ksocknal_sock_write (sock, ipaddrs, nipaddrs * sizeof(*ipaddrs));
- if (rc != 0)
- CERROR ("Error %d sending HELLO payload (%d)"
- " to %u.%u.%u.%u/%d\n", rc, nipaddrs,
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
- return (rc);
-}
-
-int
-ksocknal_invert_type(int type)
-{
- switch (type)
- {
- case SOCKNAL_CONN_ANY:
- case SOCKNAL_CONN_CONTROL:
- return (type);
- case SOCKNAL_CONN_BULK_IN:
- return SOCKNAL_CONN_BULK_OUT;
- case SOCKNAL_CONN_BULK_OUT:
- return SOCKNAL_CONN_BULK_IN;
- default:
- return (SOCKNAL_CONN_NONE);
-- }
-}
-
-int
-ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid,
- __u64 *incarnation, __u32 *ipaddrs)
-{
- struct socket *sock = conn->ksnc_sock;
- int rc;
- int nips;
- int i;
- int type;
- ptl_hdr_t hdr;
- ptl_magicversion_t *hmv;
-
- hmv = (ptl_magicversion_t *)&hdr.dest_nid;
- LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
--
-- rc = ksocknal_sock_read (sock, hmv, sizeof (*hmv));
-- if (rc != 0) {
- CERROR ("Error %d reading HELLO from "LPX64" %s\n",
- rc, *nid, portals_nid2str(SOCKNAL, *nid, ipbuf));
- CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
-- return (rc);
-- }
--
- if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) {
- CERROR ("Bad magic %#08x (%#08x expected) from "LPX64" %s\n",
- __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid,
- portals_nid2str(SOCKNAL, *nid, ipbuf));
- if (hmv->magic != le32_to_cpu (PORTALS_PROTO_MAGIC)) {
- CERROR ("Bad magic %#08x (%#08x expected) from %u.%u.%u.%u\n",
- __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC,
- HIPQUAD(conn->ksnc_ipaddr));
-- return (-EPROTO);
-- }
--
- if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
- hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
- if (hmv->version_major != cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
- hmv->version_minor != cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
-- CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
- " from "LPX64" %s\n",
- __le16_to_cpu (hmv->version_major),
- __le16_to_cpu (hmv->version_minor),
- " from %u.%u.%u.%u\n",
- le16_to_cpu (hmv->version_major),
- le16_to_cpu (hmv->version_minor),
-- PORTALS_PROTO_VERSION_MAJOR,
-- PORTALS_PROTO_VERSION_MINOR,
- *nid, portals_nid2str(SOCKNAL, *nid, ipbuf));
- HIPQUAD(conn->ksnc_ipaddr));
-- return (-EPROTO);
-- }
--
- #if (PORTALS_PROTO_VERSION_MAJOR != 0)
- # error "This code only understands protocol version 0.x"
-#if (PORTALS_PROTO_VERSION_MAJOR != 1)
-# error "This code only understands protocol version 1.x"
--#endif
- /* version 0 sends magic/version as the dest_nid of a 'hello' header,
- * so read the rest of it in now... */
- /* version 1 sends magic/version as the dest_nid of a 'hello'
- * header, followed by payload full of interface IP addresses.
- * Read the rest of it in now... */
--
-- rc = ksocknal_sock_read (sock, hmv + 1, sizeof (hdr) - sizeof (*hmv));
-- if (rc != 0) {
- CERROR ("Error %d reading rest of HELLO hdr from "LPX64" %s\n",
- rc, *nid, portals_nid2str(SOCKNAL, *nid, ipbuf));
- CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
- rc, HIPQUAD(conn->ksnc_ipaddr));
-- return (rc);
-- }
--
-- /* ...and check we got what we expected */
- if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) ||
- hdr.payload_length != __cpu_to_le32 (0)) {
- CERROR ("Expecting a HELLO hdr with 0 payload,"
- " but got type %d with %d payload from "LPX64" %s\n",
- __le32_to_cpu (hdr.type),
- __le32_to_cpu (hdr.payload_length), *nid,
- portals_nid2str(SOCKNAL, *nid, ipbuf));
- if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) {
- CERROR ("Expecting a HELLO hdr,"
- " but got type %d from %u.%u.%u.%u\n",
- le32_to_cpu (hdr.type),
- HIPQUAD(conn->ksnc_ipaddr));
-- return (-EPROTO);
-- }
--
- if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
- CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n");
- if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
- CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY"
- "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr));
-- return (-EPROTO);
-- }
--
-- if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */
- *nid = __le64_to_cpu(hdr.src_nid);
- } else if (*nid != __le64_to_cpu (hdr.src_nid)) {
- CERROR ("Connected to nid "LPX64" %s, but expecting "LPX64" %s\n",
- __le64_to_cpu (hdr.src_nid),
- portals_nid2str(SOCKNAL,
- __le64_to_cpu(hdr.src_nid),
- ipbuf),
- *nid, portals_nid2str(SOCKNAL, *nid, ipbuf2));
- *nid = le64_to_cpu(hdr.src_nid);
- } else if (*nid != le64_to_cpu (hdr.src_nid)) {
- CERROR ("Connected to nid "LPX64"@%u.%u.%u.%u "
- "but expecting "LPX64"\n",
- le64_to_cpu (hdr.src_nid),
- HIPQUAD(conn->ksnc_ipaddr), *nid);
-- return (-EPROTO);
-- }
--
- if (*type == SOCKNAL_CONN_NONE) {
- type = __le32_to_cpu(hdr.msg.hello.type);
-
- if (conn->ksnc_type == SOCKNAL_CONN_NONE) {
-- /* I've accepted this connection; peer determines type */
- *type = __le32_to_cpu(hdr.msg.hello.type);
- switch (*type) {
- case SOCKNAL_CONN_ANY:
- case SOCKNAL_CONN_CONTROL:
- break;
- case SOCKNAL_CONN_BULK_IN:
- *type = SOCKNAL_CONN_BULK_OUT;
- break;
- case SOCKNAL_CONN_BULK_OUT:
- *type = SOCKNAL_CONN_BULK_IN;
- break;
- default:
- CERROR ("Unexpected type %d from "LPX64" %s\n",
- *type, *nid,
- portals_nid2str(SOCKNAL, *nid, ipbuf));
- conn->ksnc_type = ksocknal_invert_type(type);
- if (conn->ksnc_type == SOCKNAL_CONN_NONE) {
- CERROR ("Unexpected type %d from "LPX64"@%u.%u.%u.%u\n",
- type, *nid, HIPQUAD(conn->ksnc_ipaddr));
-- return (-EPROTO);
-- }
- } else if (__le32_to_cpu(hdr.msg.hello.type) != SOCKNAL_CONN_NONE) {
- CERROR ("Mismatched types: me %d "LPX64" %s %d\n",
- *type, *nid, portals_nid2str(SOCKNAL, *nid, ipbuf),
- __le32_to_cpu(hdr.msg.hello.type));
- } else if (ksocknal_invert_type(type) != conn->ksnc_type) {
- CERROR ("Mismatched types: me %d, "LPX64"@%u.%u.%u.%u %d\n",
- conn->ksnc_type, *nid, HIPQUAD(conn->ksnc_ipaddr),
- le32_to_cpu(hdr.msg.hello.type));
-- return (-EPROTO);
-- }
--
- *incarnation = __le64_to_cpu(hdr.msg.hello.incarnation);
- *incarnation = le64_to_cpu(hdr.msg.hello.incarnation);
--
- return (0);
- nips = __le32_to_cpu (hdr.payload_length) / sizeof (__u32);
-
- if (nips > SOCKNAL_MAX_INTERFACES ||
- nips * sizeof(__u32) != __le32_to_cpu (hdr.payload_length)) {
- CERROR("Bad payload length %d from "LPX64"@%u.%u.%u.%u\n",
- __le32_to_cpu (hdr.payload_length),
- *nid, HIPQUAD(conn->ksnc_ipaddr));
- }
-
- if (nips == 0)
- return (0);
-
- rc = ksocknal_sock_read (sock, ipaddrs, nips * sizeof(*ipaddrs));
- if (rc != 0) {
- CERROR ("Error %d reading IPs from "LPX64"@%u.%u.%u.%u\n",
- rc, *nid, HIPQUAD(conn->ksnc_ipaddr));
- return (rc);
- }
-
- for (i = 0; i < nips; i++) {
- ipaddrs[i] = __le32_to_cpu(ipaddrs[i]);
-
- if (ipaddrs[i] == 0) {
- CERROR("Zero IP[%d] from "LPX64"@%u.%u.%u.%u\n",
- i, *nid, HIPQUAD(conn->ksnc_ipaddr));
- return (-EPROTO);
- }
- }
-
- return (nips);
-}
-
-int
-ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
-{
- mm_segment_t oldmm = get_fs ();
- struct socket *sock = conn->ksnc_sock;
- int len;
- int rc;
-
- rc = ksocknal_getconnsock (conn);
- if (rc != 0) {
- LASSERT (conn->ksnc_closing);
- *txmem = *rxmem = *nagle = 0;
- return (-ESHUTDOWN);
- }
-
- set_fs (KERNEL_DS);
-
- len = sizeof(*txmem);
- rc = sock_getsockopt(sock, SOL_SOCKET, SO_SNDBUF,
- (char *)txmem, &len);
- if (rc == 0) {
- len = sizeof(*rxmem);
- rc = sock_getsockopt(sock, SOL_SOCKET, SO_RCVBUF,
- (char *)rxmem, &len);
- }
- if (rc == 0) {
- len = sizeof(*nagle);
- rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY,
- (char *)nagle, &len);
- }
-
- set_fs (oldmm);
- ksocknal_putconnsock (conn);
-
- if (rc == 0)
- *nagle = !*nagle;
- else
- *txmem = *rxmem = *nagle = 0;
-
- return (rc);
--}
--
--int
--ksocknal_setup_sock (struct socket *sock)
--{
-- mm_segment_t oldmm = get_fs ();
-- int rc;
-- int option;
- int keep_idle;
- int keep_intvl;
- int keep_count;
- int do_keepalive;
-- struct linger linger;
--
- #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-- sock->sk->sk_allocation = GFP_NOFS;
- #else
- sock->sk->allocation = GFP_NOFS;
- #endif
--
-- /* Ensure this socket aborts active sends immediately when we close
-- * it. */
--
-- linger.l_onoff = 0;
-- linger.l_linger = 0;
--
-- set_fs (KERNEL_DS);
-- rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER,
-- (char *)&linger, sizeof (linger));
-- set_fs (oldmm);
-- if (rc != 0) {
-- CERROR ("Can't set SO_LINGER: %d\n", rc);
-- return (rc);
-- }
--
-- option = -1;
-- set_fs (KERNEL_DS);
-- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2,
-- (char *)&option, sizeof (option));
-- set_fs (oldmm);
-- if (rc != 0) {
-- CERROR ("Can't set SO_LINGER2: %d\n", rc);
-- return (rc);
-- }
--
- #if SOCKNAL_USE_KEEPALIVES
- /* Keepalives: If 3/4 of the timeout elapses, start probing every
- * second until the timeout elapses. */
- if (!ksocknal_tunables.ksnd_nagle) {
- option = 1;
-
- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't disable nagle: %d\n", rc);
- return (rc);
- }
- }
-
- if (ksocknal_tunables.ksnd_buffer_size > 0) {
- option = ksocknal_tunables.ksnd_buffer_size;
-
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set send buffer %d: %d\n",
- option, rc);
- return (rc);
- }
--
- option = (ksocknal_data.ksnd_io_timeout * 3) / 4;
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set receive buffer %d: %d\n",
- option, rc);
- return (rc);
- }
- }
-
- /* snapshot tunables */
- keep_idle = ksocknal_tunables.ksnd_keepalive_idle;
- keep_count = ksocknal_tunables.ksnd_keepalive_count;
- keep_intvl = ksocknal_tunables.ksnd_keepalive_intvl;
-
- do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
-
- option = (do_keepalive ? 1 : 0);
-- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE,
- (char *)&option, sizeof (option));
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&option, sizeof (option));
-- set_fs (oldmm);
-- if (rc != 0) {
- CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
- CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
-- return (rc);
-- }
-
- option = 1;
-
- if (!do_keepalive)
- return (0);
-
-- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&option, sizeof (option));
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE,
- (char *)&keep_idle, sizeof (keep_idle));
-- set_fs (oldmm);
-- if (rc != 0) {
- CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
- CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
-- return (rc);
-- }
-
- option = ksocknal_data.ksnd_io_timeout / 4;
-
-- set_fs (KERNEL_DS);
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT,
- (char *)&option, sizeof (option));
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&keep_intvl, sizeof (keep_intvl));
-- set_fs (oldmm);
-- if (rc != 0) {
-- CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
-- return (rc);
-- }
--
- option = 1;
-- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&option, sizeof (option));
- rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT,
- (char *)&keep_count, sizeof (keep_count));
-- set_fs (oldmm);
-- if (rc != 0) {
- CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
- CERROR ("Can't set TCP_KEEPCNT: %d\n", rc);
-- return (rc);
-- }
- #endif
-
-- return (0);
--}
--
- static int
- ksocknal_connect_sock(struct socket **sockp, int *may_retry,
- ksock_route_t *route, int local_port)
-int
-ksocknal_connect_peer (ksock_route_t *route, int type)
--{
- struct sockaddr_in locaddr;
- struct sockaddr_in srvaddr;
- struct socket *sock;
- int rc;
- int option;
- struct sockaddr_in ipaddr;
-- mm_segment_t oldmm = get_fs();
-- struct timeval tv;
-
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(local_port);
- locaddr.sin_addr.s_addr = INADDR_ANY;
-
- memset (&srvaddr, 0, sizeof (srvaddr));
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons (route->ksnr_port);
- srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr);
-
- *may_retry = 0;
-
- int fd;
- struct socket *sock;
- int rc;
-
-- rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
- *sockp = sock;
-- if (rc != 0) {
-- CERROR ("Can't create autoconnect socket: %d\n", rc);
-- return (rc);
-- }
--
-- /* Ugh; have to map_fd for compatibility with sockets passed in
-- * from userspace. And we actually need the sock->file refcounting
-- * that this gives you :) */
--
- rc = sock_map_fd (sock);
- if (rc < 0) {
- fd = sock_map_fd (sock);
- if (fd < 0) {
-- sock_release (sock);
- CERROR ("sock_map_fd error %d\n", rc);
- return (rc);
- CERROR ("sock_map_fd error %d\n", fd);
- return (fd);
-- }
--
- /* NB the file descriptor (rc) now owns the ref on sock->file */
- /* NB the fd now owns the ref on sock->file */
-- LASSERT (sock->file != NULL);
- LASSERT (file_count(sock->file) == 1);
-
- get_file(sock->file); /* extra ref makes sock->file */
- sys_close(rc); /* survive this close */
-
- /* Still got a single ref on sock->file */
-- LASSERT (file_count(sock->file) == 1);
--
-- /* Set the socket timeouts, so our connection attempt completes in
-- * finite time */
- tv.tv_sec = ksocknal_data.ksnd_io_timeout;
- tv.tv_sec = ksocknal_tunables.ksnd_io_timeout;
-- tv.tv_usec = 0;
--
-- set_fs (KERNEL_DS);
-- rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDTIMEO,
-- (char *)&tv, sizeof (tv));
-- set_fs (oldmm);
-- if (rc != 0) {
- CERROR ("Can't set send timeout %d: %d\n",
- ksocknal_data.ksnd_io_timeout, rc);
- goto failed;
- CERROR ("Can't set send timeout %d: %d\n",
- ksocknal_tunables.ksnd_io_timeout, rc);
- goto out;
-- }
--
-- set_fs (KERNEL_DS);
-- rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVTIMEO,
-- (char *)&tv, sizeof (tv));
-- set_fs (oldmm);
-- if (rc != 0) {
-- CERROR ("Can't set receive timeout %d: %d\n",
- ksocknal_data.ksnd_io_timeout, rc);
- goto failed;
- ksocknal_tunables.ksnd_io_timeout, rc);
- goto out;
-- }
--
- option = 1;
- set_fs (KERNEL_DS);
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
- goto failed;
- }
-
- if (route->ksnr_buffer_size != 0) {
- option = route->ksnr_buffer_size;
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set send buffer %d: %d\n",
- route->ksnr_buffer_size, rc);
- goto failed;
- }
- if (route->ksnr_myipaddr != 0) {
- /* Bind to the local IP address */
- memset (&ipaddr, 0, sizeof (ipaddr));
- ipaddr.sin_family = AF_INET;
- ipaddr.sin_port = htons (0); /* ANY */
- ipaddr.sin_addr.s_addr = htonl(route->ksnr_myipaddr);
--
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- rc = sock->ops->bind (sock, (struct sockaddr *)&ipaddr,
- sizeof (ipaddr));
-- if (rc != 0) {
- CERROR ("Can't set receive buffer %d: %d\n",
- route->ksnr_buffer_size, rc);
- goto failed;
- CERROR ("Can't bind to local IP %u.%u.%u.%u: %d\n",
- HIPQUAD(route->ksnr_myipaddr), rc);
- goto out;
-- }
- }
-
- rc = sock->ops->bind(sock,
- (struct sockaddr *)&locaddr, sizeof(locaddr));
- if (rc == -EADDRINUSE) {
- CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *may_retry = 1;
- goto failed;
-- }
-
- memset (&ipaddr, 0, sizeof (ipaddr));
- ipaddr.sin_family = AF_INET;
- ipaddr.sin_port = htons (route->ksnr_port);
- ipaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr);
-
- rc = sock->ops->connect (sock, (struct sockaddr *)&ipaddr,
- sizeof (ipaddr), sock->file->f_flags);
-- if (rc != 0) {
- CERROR("Error trying to bind to reserved port %d: %d\n",
- local_port, rc);
- goto failed;
- CERROR ("Can't connect to nid "LPX64
- " local IP: %u.%u.%u.%u,"
- " remote IP: %u.%u.%u.%u/%d: %d\n",
- route->ksnr_peer->ksnp_nid,
- HIPQUAD(route->ksnr_myipaddr),
- HIPQUAD(route->ksnr_ipaddr),
- route->ksnr_port, rc);
- goto out;
-- }
-
- rc = sock->ops->connect(sock,
- (struct sockaddr *)&srvaddr, sizeof(srvaddr),
- sock->file->f_flags);
- if (rc == 0)
- return 0;
-
- /* EADDRNOTAVAIL probably means we're already connected to the same
- * peer/port on the same local port on a differently typed
- * connection. Let our caller retry with a different local
- * port... */
- *may_retry = (rc == -EADDRNOTAVAIL);
-
- CDEBUG(*may_retry ? D_NET : D_ERROR,
- "Error %d connecting to %u.%u.%u.%u/%d\n", rc,
- HIPQUAD(route->ksnr_ipaddr), route->ksnr_port);
-
- failed:
- fput(sock->file);
- return rc;
- }
-
- int
- ksocknal_connect_peer (ksock_route_t *route, int type)
- {
- struct socket *sock;
- int rc;
- int port;
- int may_retry;
-
- /* Iterate through reserved ports. When typed connections are
- * used, we will need to bind to multiple ports, but we only know
- * this at connect time. But, by that time we've already called
- * bind() so we need a new socket. */
-
- for (port = 1023; port > 512; --port) {
-
- rc = ksocknal_connect_sock(&sock, &may_retry, route, port);
--
- if (rc == 0) {
- rc = ksocknal_create_conn(route, sock,
- route->ksnr_irq_affinity, type);
- fput(sock->file);
- return rc;
- }
-
- if (!may_retry)
- return rc;
- rc = ksocknal_create_conn (route, sock, type);
- if (rc == 0) {
- /* Take an extra ref on sock->file to compensate for the
- * upcoming close which will lose fd's ref on it. */
- get_file (sock->file);
-- }
--
- CERROR("Out of ports trying to bind to a reserved port\n");
- return (-EADDRINUSE);
- out:
- sys_close (fd);
- return (rc);
--}
--
--void
--ksocknal_autoconnect (ksock_route_t *route)
--{
-- LIST_HEAD (zombies);
-- ksock_tx_t *tx;
-- ksock_peer_t *peer;
-- unsigned long flags;
-- int rc;
-- int type;
--
-- for (;;) {
-- for (type = 0; type < SOCKNAL_CONN_NTYPES; type++)
-- if ((route->ksnr_connecting & (1 << type)) != 0)
-- break;
-- LASSERT (type < SOCKNAL_CONN_NTYPES);
--
-- rc = ksocknal_connect_peer (route, type);
--
-- if (rc != 0)
-- break;
--
-- /* successfully autoconnected: create_conn did the
-- * route/conn binding and scheduled any blocked packets */
--
-- if (route->ksnr_connecting == 0) {
-- /* No more connections required */
-- return;
-- }
-- }
--
-- /* Connection attempt failed */
--
-- write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
--
-- peer = route->ksnr_peer;
-- route->ksnr_connecting = 0;
--
-- /* This is a retry rather than a new connection */
-- LASSERT (route->ksnr_retry_interval != 0);
-- route->ksnr_timeout = jiffies + route->ksnr_retry_interval;
-- route->ksnr_retry_interval = MIN (route->ksnr_retry_interval * 2,
-- SOCKNAL_MAX_RECONNECT_INTERVAL);
--
-- if (!list_empty (&peer->ksnp_tx_queue) &&
-- ksocknal_find_connecting_route_locked (peer) == NULL) {
-- LASSERT (list_empty (&peer->ksnp_conns));
--
-- /* None of the connections that the blocked packets are
-- * waiting for have been successful. Complete them now... */
-- do {
-- tx = list_entry (peer->ksnp_tx_queue.next,
-- ksock_tx_t, tx_list);
-- list_del (&tx->tx_list);
-- list_add_tail (&tx->tx_list, &zombies);
-- } while (!list_empty (&peer->ksnp_tx_queue));
-- }
--
- /* make this route least-favourite for re-selection */
-#if 0 /* irrelevent with only eager routes */
-- if (!route->ksnr_deleted) {
- /* make this route least-favourite for re-selection */
-- list_del(&route->ksnr_list);
-- list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-- }
-
-#endif
-- write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
--
-- while (!list_empty (&zombies)) {
-- char ipbuf[PTL_NALFMT_SIZE];
-- char ipbuf2[PTL_NALFMT_SIZE];
-- tx = list_entry (zombies.next, ksock_tx_t, tx_list);
--
-- CERROR ("Deleting packet type %d len %d ("LPX64" %s->"LPX64" %s)\n",
- NTOH__u32 (tx->tx_hdr->type),
- NTOH__u32 (tx->tx_hdr->payload_length),
- NTOH__u64 (tx->tx_hdr->src_nid),
- le32_to_cpu (tx->tx_hdr->type),
- le32_to_cpu (tx->tx_hdr->payload_length),
- le64_to_cpu (tx->tx_hdr->src_nid),
-- portals_nid2str(SOCKNAL,
- NTOH__u64(tx->tx_hdr->src_nid),
- le64_to_cpu(tx->tx_hdr->src_nid),
-- ipbuf),
- NTOH__u64 (tx->tx_hdr->dest_nid),
- le64_to_cpu (tx->tx_hdr->dest_nid),
-- portals_nid2str(SOCKNAL,
- NTOH__u64(tx->tx_hdr->src_nid),
- le64_to_cpu(tx->tx_hdr->src_nid),
-- ipbuf2));
--
-- list_del (&tx->tx_list);
-- /* complete now */
-- ksocknal_tx_done (tx, 0);
-- }
--}
--
--int
--ksocknal_autoconnectd (void *arg)
--{
-- long id = (long)arg;
-- char name[16];
-- unsigned long flags;
-- ksock_route_t *route;
-- int rc;
--
-- snprintf (name, sizeof (name), "ksocknal_ad%02ld", id);
-- kportal_daemonize (name);
-- kportal_blockallsigs ();
--
-- spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
--
-- while (!ksocknal_data.ksnd_shuttingdown) {
--
-- if (!list_empty (&ksocknal_data.ksnd_autoconnectd_routes)) {
-- route = list_entry (ksocknal_data.ksnd_autoconnectd_routes.next,
-- ksock_route_t, ksnr_connect_list);
-
-
-- list_del (&route->ksnr_connect_list);
-- spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags);
--
-- ksocknal_autoconnect (route);
-- ksocknal_put_route (route);
--
- spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
- spin_lock_irqsave(&ksocknal_data.ksnd_autoconnectd_lock,
- flags);
-- continue;
-- }
-
- spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags);
--
- rc = wait_event_interruptible (ksocknal_data.ksnd_autoconnectd_waitq,
- ksocknal_data.ksnd_shuttingdown ||
- !list_empty (&ksocknal_data.ksnd_autoconnectd_routes));
- spin_unlock_irqrestore(&ksocknal_data.ksnd_autoconnectd_lock,
- flags);
--
- spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
- rc = wait_event_interruptible(ksocknal_data.ksnd_autoconnectd_waitq,
- ksocknal_data.ksnd_shuttingdown ||
- !list_empty(&ksocknal_data.ksnd_autoconnectd_routes));
-
- spin_lock_irqsave(&ksocknal_data.ksnd_autoconnectd_lock, flags);
-- }
--
-- spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags);
--
-- ksocknal_thread_fini ();
-- return (0);
--}
--
--ksock_conn_t *
--ksocknal_find_timed_out_conn (ksock_peer_t *peer)
--{
-- /* We're called with a shared lock on ksnd_global_lock */
-- ksock_conn_t *conn;
-- struct list_head *ctmp;
- ksock_sched_t *sched;
--
-- list_for_each (ctmp, &peer->ksnp_conns) {
-- conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
- sched = conn->ksnc_scheduler;
--
-- /* Don't need the {get,put}connsock dance to deref ksnc_sock... */
-- LASSERT (!conn->ksnc_closing);
-
-
- if (conn->ksnc_sock->sk->sk_err != 0) {
- /* Something (e.g. failed keepalive) set the socket error */
- atomic_inc (&conn->ksnc_refcount);
- CERROR ("Socket error %d: "LPX64" %p %d.%d.%d.%d\n",
- conn->ksnc_sock->sk->sk_err, peer->ksnp_nid,
- conn, HIPQUAD(conn->ksnc_ipaddr));
- return (conn);
- }
-
-- if (conn->ksnc_rx_started &&
-- time_after_eq (jiffies, conn->ksnc_rx_deadline)) {
-- /* Timed out incomplete incoming message */
-- atomic_inc (&conn->ksnc_refcount);
-- CERROR ("Timed out RX from "LPX64" %p %d.%d.%d.%d\n",
- peer->ksnp_nid, conn, HIPQUAD(conn->ksnc_ipaddr));
- peer->ksnp_nid,conn,HIPQUAD(conn->ksnc_ipaddr));
-- return (conn);
-- }
-
-
-- if ((!list_empty (&conn->ksnc_tx_queue) ||
-- conn->ksnc_sock->sk->sk_wmem_queued != 0) &&
-- time_after_eq (jiffies, conn->ksnc_tx_deadline)) {
- /* Timed out messages queued for sending, or
- * messages buffered in the socket's send buffer */
- /* Timed out messages queued for sending or
- * buffered in the socket's send buffer */
-- atomic_inc (&conn->ksnc_refcount);
- CERROR ("Timed out TX to "LPX64" %s%d %p %d.%d.%d.%d\n",
- peer->ksnp_nid,
- CERROR ("Timed out TX to "LPX64" %s%d %p %d.%d.%d.%d\n",
- peer->ksnp_nid,
-- list_empty (&conn->ksnc_tx_queue) ? "" : "Q ",
-- conn->ksnc_sock->sk->sk_wmem_queued, conn,
-- HIPQUAD(conn->ksnc_ipaddr));
-- return (conn);
-- }
-- }
--
-- return (NULL);
--}
--
--void
--ksocknal_check_peer_timeouts (int idx)
--{
-- struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
-- struct list_head *ptmp;
-- ksock_peer_t *peer;
-- ksock_conn_t *conn;
--
-- again:
-- /* NB. We expect to have a look at all the peers and not find any
-- * connections to time out, so we just use a shared lock while we
-- * take a look... */
-- read_lock (&ksocknal_data.ksnd_global_lock);
--
-- list_for_each (ptmp, peers) {
-- peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-- conn = ksocknal_find_timed_out_conn (peer);
--
-- if (conn != NULL) {
-- read_unlock (&ksocknal_data.ksnd_global_lock);
--
-- CERROR ("Timeout out conn->"LPX64" ip %d.%d.%d.%d:%d\n",
-- peer->ksnp_nid,
-- HIPQUAD(conn->ksnc_ipaddr),
-- conn->ksnc_port);
-- ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
--
-- /* NB we won't find this one again, but we can't
-- * just proceed with the next peer, since we dropped
-- * ksnd_global_lock and it might be dead already! */
-- ksocknal_put_conn (conn);
-- goto again;
-- }
-- }
--
-- read_unlock (&ksocknal_data.ksnd_global_lock);
--}
--
--int
--ksocknal_reaper (void *arg)
--{
-- wait_queue_t wait;
-- unsigned long flags;
-- ksock_conn_t *conn;
-- ksock_sched_t *sched;
-- struct list_head enomem_conns;
-- int nenomem_conns;
-- int timeout;
-- int i;
-- int peer_index = 0;
-- unsigned long deadline = jiffies;
--
-- kportal_daemonize ("ksocknal_reaper");
-- kportal_blockallsigs ();
--
-- INIT_LIST_HEAD(&enomem_conns);
-- init_waitqueue_entry (&wait, current);
--
-- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
--
-- while (!ksocknal_data.ksnd_shuttingdown) {
--
-- if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) {
-- conn = list_entry (ksocknal_data.ksnd_deathrow_conns.next,
-- ksock_conn_t, ksnc_list);
-- list_del (&conn->ksnc_list);
--
-- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
--
-- ksocknal_terminate_conn (conn);
-- ksocknal_put_conn (conn);
--
-- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
-- continue;
-- }
--
-- if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) {
-- conn = list_entry (ksocknal_data.ksnd_zombie_conns.next,
-- ksock_conn_t, ksnc_list);
-- list_del (&conn->ksnc_list);
--
-- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
--
-- ksocknal_destroy_conn (conn);
--
-- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
-- continue;
-- }
--
-- if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) {
-- list_add(&enomem_conns, &ksocknal_data.ksnd_enomem_conns);
-- list_del_init(&ksocknal_data.ksnd_enomem_conns);
-- }
--
-- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
--
-- /* reschedule all the connections that stalled with ENOMEM... */
-- nenomem_conns = 0;
-- while (!list_empty (&enomem_conns)) {
-- conn = list_entry (enomem_conns.next,
-- ksock_conn_t, ksnc_tx_list);
-- list_del (&conn->ksnc_tx_list);
--
-- sched = conn->ksnc_scheduler;
--
-- spin_lock_irqsave (&sched->kss_lock, flags);
--
-- LASSERT (conn->ksnc_tx_scheduled);
-- conn->ksnc_tx_ready = 1;
-- list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns);
-- wake_up (&sched->kss_waitq);
--
-- spin_unlock_irqrestore (&sched->kss_lock, flags);
-- nenomem_conns++;
-- }
--
-- /* careful with the jiffy wrap... */
-- while ((timeout = (int)(deadline - jiffies)) <= 0) {
-- const int n = 4;
-- const int p = 1;
-- int chunk = ksocknal_data.ksnd_peer_hash_size;
--
-- /* Time to check for timeouts on a few more peers: I do
-- * checks every 'p' seconds on a proportion of the peer
-- * table and I need to check every connection 'n' times
-- * within a timeout interval, to ensure I detect a
-- * timeout on any connection within (n+1)/n times the
-- * timeout interval. */
--
- if (ksocknal_data.ksnd_io_timeout > n * p)
- if (ksocknal_tunables.ksnd_io_timeout > n * p)
-- chunk = (chunk * n * p) /
- ksocknal_data.ksnd_io_timeout;
- ksocknal_tunables.ksnd_io_timeout;
-- if (chunk == 0)
-- chunk = 1;
--
-- for (i = 0; i < chunk; i++) {
-- ksocknal_check_peer_timeouts (peer_index);
-- peer_index = (peer_index + 1) %
-- ksocknal_data.ksnd_peer_hash_size;
-- }
--
-- deadline += p * HZ;
-- }
--
-- if (nenomem_conns != 0) {
-- /* Reduce my timeout if I rescheduled ENOMEM conns.
-- * This also prevents me getting woken immediately
-- * if any go back on my enomem list. */
-- timeout = SOCKNAL_ENOMEM_RETRY;
-- }
-- ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
--
- add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
-- set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
--
-- if (!ksocknal_data.ksnd_shuttingdown &&
-- list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
-- list_empty (&ksocknal_data.ksnd_zombie_conns))
-- schedule_timeout (timeout);
--
-- set_current_state (TASK_RUNNING);
-- remove_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
--
-- spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
-- }
--
-- spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
--
-- ksocknal_thread_fini ();
-- return (0);
--}
--
- nal_cb_t ksocknal_lib = {
- nal_data: &ksocknal_data, /* NAL private data */
- cb_send: ksocknal_send,
- cb_send_pages: ksocknal_send_pages,
- cb_recv: ksocknal_recv,
- cb_recv_pages: ksocknal_recv_pages,
- cb_read: ksocknal_read,
- cb_write: ksocknal_write,
- cb_malloc: ksocknal_malloc,
- cb_free: ksocknal_free,
- cb_printf: ksocknal_printf,
- cb_cli: ksocknal_cli,
- cb_sti: ksocknal_sti,
- cb_dist: ksocknal_dist
-lib_nal_t ksocknal_lib = {
- libnal_data: &ksocknal_data, /* NAL private data */
- libnal_send: ksocknal_send,
- libnal_send_pages: ksocknal_send_pages,
- libnal_recv: ksocknal_recv,
- libnal_recv_pages: ksocknal_recv_pages,
- libnal_dist: ksocknal_dist
--};
+++ /dev/null
--.deps
--Makefile
--link-stamp
--.*.cmd
--autoMakefile.in
--autoMakefile
- sources
--*.ko
--*.mod.c
--.*.flags
--.tmp_versions
--.depend
+++ /dev/null
- MODULES = portals
-MODULES = libcfs
--libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o
-
- api-sources := $(wildcard @LUSTRE@/portals/portals/api-*.c)
- lib-sources := $(wildcard @LUSTRE@/portals/portals/lib-*.c)
-
- portals-objs += $(libcfs-objs) $(patsubst %.c,%.o,$(notdir $(api-sources) $(lib-sources)))
--
--@INCLUDE_RULES@
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--include fs/lustre/portals/Kernelenv
--
--obj-y += libcfs.o
--libcfs-objs := module.o proc.o debug.o lwt.o tracefile.o
+++ /dev/null
--# Copyright (C) 2001, 2002 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--if MODULES
- modulenet_DATA := portals$(KMODEXT)
-modulenet_DATA := libcfs$(KMODEXT)
--endif
-
- sources:
- rm -f sources
- @for i in $(api-sources) $(lib-sources) ; do \
- echo ln -sf $$i . ; \
- ln -sf $$i . || exit 1 ; \
- done
- touch sources
--
--MOSTLYCLEANFILES = *.o *.ko *.mod.c
- CLEANFILES = sources lib-*.c api-*.c
- DIST_SOURCES = $(libcfs-objs:%.o=%.c) *.h
-DIST_SOURCES = $(libcfs-objs:%.o=%.c) tracefile.h
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002 Cluster File Systems, Inc.
-- * Author: Phil Schwan <phil@clusterfs.com>
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#ifndef EXPORT_SYMTAB
--# define EXPORT_SYMTAB
--#endif
--
--#include <linux/config.h>
--#include <linux/module.h>
--#include <linux/kmod.h>
--#include <linux/notifier.h>
--#include <linux/kernel.h>
--#include <linux/mm.h>
--#include <linux/string.h>
--#include <linux/stat.h>
--#include <linux/errno.h>
--#include <linux/smp_lock.h>
--#include <linux/unistd.h>
--#include <linux/interrupt.h>
--#include <asm/system.h>
--#include <asm/uaccess.h>
--#include <linux/completion.h>
--
--#include <linux/fs.h>
--#include <linux/stat.h>
--#include <asm/uaccess.h>
--#include <asm/segment.h>
--#include <linux/miscdevice.h>
--#include <linux/version.h>
--
--# define DEBUG_SUBSYSTEM S_PORTALS
--
--#include <linux/kp30.h>
--#include <linux/portals_compat25.h>
--#include <linux/libcfs.h>
--
--#include "tracefile.h"
--
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
--#include <linux/kallsyms.h>
--#endif
--
--unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL |
- S_GMNAL | S_IBNAL);
- S_GMNAL | S_OPENIBNAL);
--EXPORT_SYMBOL(portal_subsystem_debug);
--
--unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
-- D_RPCTRACE | D_VFSTRACE);
--EXPORT_SYMBOL(portal_debug);
--
--unsigned int portal_printk;
--EXPORT_SYMBOL(portal_printk);
--
--unsigned int portal_stack;
--EXPORT_SYMBOL(portal_stack);
--
--#ifdef __KERNEL__
--atomic_t portal_kmemory = ATOMIC_INIT(0);
--EXPORT_SYMBOL(portal_kmemory);
--#endif
--
--static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq);
--
--char debug_file_path[1024] = "/tmp/lustre-log";
--static char debug_file_name[1024];
--static int handled_panic; /* to avoid recursive calls to notifiers */
--char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall";
--
--int portals_do_debug_dumplog(void *arg)
--{
-- void *journal_info;
--
-- kportal_daemonize("");
--
-- reparent_to_init();
-- journal_info = current->journal_info;
-- current->journal_info = NULL;
--
-- snprintf(debug_file_name, sizeof(debug_file_path) - 1,
-- "%s.%ld.%ld", debug_file_path, CURRENT_SECONDS, (long)arg);
- printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name);
-- tracefile_dump_all_pages(debug_file_name);
--
-- current->journal_info = journal_info;
-- wake_up(&debug_ctlwq);
-- return 0;
--}
--
--void portals_debug_dumplog(void)
--{
-- int rc;
-- DECLARE_WAITQUEUE(wait, current);
-- ENTRY;
--
-- /* we're being careful to ensure that the kernel thread is
-- * able to set our state to running as it exits before we
-- * get to schedule() */
-- set_current_state(TASK_INTERRUPTIBLE);
-- add_wait_queue(&debug_ctlwq, &wait);
--
-- rc = kernel_thread(portals_do_debug_dumplog, (void *)(long)current->pid,
-- CLONE_VM | CLONE_FS | CLONE_FILES);
-- if (rc < 0)
-- printk(KERN_ERR "LustreError: cannot start log dump thread: "
-- "%d\n", rc);
-- else
-- schedule();
--
-- /* be sure to teardown if kernel_thread() failed */
-- remove_wait_queue(&debug_ctlwq, &wait);
-- set_current_state(TASK_RUNNING);
--}
--
--static int panic_dumplog(struct notifier_block *self, unsigned long unused1,
-- void *unused2)
--{
-- if (handled_panic)
-- return 0;
-- else
-- handled_panic = 1;
--
-- if (in_interrupt()) {
-- trace_debug_print();
-- return 0;
-- }
--
-- while (current->lock_depth >= 0)
-- unlock_kernel();
-- portals_debug_dumplog();
-- return 0;
--}
--
--static struct notifier_block lustre_panic_notifier = {
-- notifier_call : panic_dumplog,
-- next : NULL,
-- priority : 10000
--};
--
--int portals_debug_init(unsigned long bufsize)
--{
-- notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
-- return tracefile_init();
--}
--
--int portals_debug_cleanup(void)
--{
-- tracefile_exit();
-- notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier);
-- return 0;
--}
--
--int portals_debug_clear_buffer(void)
--{
-- trace_flush_pages();
-- return 0;
--}
--
--/* Debug markers, although printed by S_PORTALS
-- * should not be be marked as such. */
--#undef DEBUG_SUBSYSTEM
--#define DEBUG_SUBSYSTEM S_UNDEFINED
--int portals_debug_mark_buffer(char *text)
--{
-- CDEBUG(D_TRACE,"***************************************************\n");
- CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text);
- CWARN("DEBUG MARKER: %s\n", text);
-- CDEBUG(D_TRACE,"***************************************************\n");
--
-- return 0;
--}
--#undef DEBUG_SUBSYSTEM
--#define DEBUG_SUBSYSTEM S_PORTALS
--
--void portals_debug_set_level(unsigned int debug_level)
--{
- printk("Lustre: Setting portals debug level to %08x\n", debug_level);
- printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n",
- debug_level);
-- portal_debug = debug_level;
--}
--
--void portals_run_upcall(char **argv)
--{
-- int rc;
-- int argc;
-- char *envp[] = {
-- "HOME=/",
-- "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-- NULL};
-- ENTRY;
--
-- argv[0] = portals_upcall;
-- argc = 1;
-- while (argv[argc] != NULL)
-- argc++;
--
-- LASSERT(argc >= 2);
--
-- rc = USERMODEHELPER(argv[0], argv, envp);
-- if (rc < 0) {
-- CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; "
-- "check /proc/sys/portals/upcall\n",
-- rc, argv[0], argv[1],
-- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
-- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
-- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
-- argc < 6 ? "" : ",...");
-- } else {
-- CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n",
-- argv[0], argv[1],
-- argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
-- argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
-- argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
-- argc < 6 ? "" : ",...");
-- }
--}
--
--void portals_run_lbug_upcall(char *file, const char *fn, const int line)
--{
-- char *argv[6];
-- char buf[32];
--
-- ENTRY;
-- snprintf (buf, sizeof buf, "%d", line);
--
-- argv[1] = "LBUG";
-- argv[2] = file;
-- argv[3] = (char *)fn;
-- argv[4] = buf;
-- argv[5] = NULL;
--
-- portals_run_upcall (argv);
--}
--
--char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
--{
- if (nid == PTL_NID_ANY) {
- snprintf(str, PTL_NALFMT_SIZE - 1, "%s",
- "PTL_NID_ANY");
- return str;
- }
-
-- switch(nal){
--/* XXX this could be a nal method of some sort, 'cept it's config
-- * dependent whether (say) socknal NIDs are actually IP addresses... */
--#ifndef CRAY_PORTALS
-- case TCPNAL:
-- /* userspace NAL */
- case OPENIBNAL:
-- case SOCKNAL:
-- snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u",
-- (__u32)(nid >> 32), HIPQUAD(nid));
-- break;
-- case QSWNAL:
-- case GMNAL:
- case IBNAL:
- case SCIMACNAL:
-- snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u",
-- (__u32)(nid >> 32), (__u32)nid);
-- break;
--#endif
-- default:
-- snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx",
-- nal, (long long)nid);
- break;
- }
- return str;
-}
-/* bug #4615 */
-char *portals_id2str(int nal, ptl_process_id_t id, char *str)
-{
- switch(nal){
-#ifndef CRAY_PORTALS
- case TCPNAL:
- /* userspace NAL */
- case OPENIBNAL:
- case SOCKNAL:
- snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u",
- (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid);
- break;
- case QSWNAL:
- case GMNAL:
- snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u",
- (__u32)(id.nid >> 32), (__u32)id.nid, id.pid);
- break;
-#endif
- default:
- snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx",
- nal, (long long)id.nid, (long)id.pid );
- break;
-- }
-- return str;
--}
-
--
--#ifdef __KERNEL__
--char stack_backtrace[LUSTRE_TRACE_SIZE];
--spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED;
--
--#if defined(__arch_um__)
--
--char *portals_debug_dumpstack(void)
--{
-- asm("int $3");
-- return "dump stack\n";
--}
--
--#elif defined(__i386__)
--
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--extern int lookup_symbol(unsigned long address, char *buf, int buflen);
--const char *kallsyms_lookup(unsigned long addr,
-- unsigned long *symbolsize,
-- unsigned long *offset,
-- char **modname, char *namebuf)
--{
-- int rc = lookup_symbol(addr, namebuf, 128);
-- if (rc == -ENOSYS)
-- return NULL;
-- return namebuf;
--}
--#endif
--
--char *portals_debug_dumpstack(void)
--{
-- unsigned long esp = current->thread.esp, addr;
-- unsigned long *stack = (unsigned long *)&esp;
-- char *buf = stack_backtrace, *pbuf = buf;
-- int size;
--
-- /* User space on another CPU? */
-- if ((esp ^ (unsigned long)current) & (PAGE_MASK << 1)){
-- buf[0] = '\0';
-- goto out;
-- }
--
-- size = sprintf(pbuf, " Call Trace: ");
-- pbuf += size;
-- while (((long) stack & (THREAD_SIZE - 1)) != 0) {
-- addr = *stack++;
-- if (kernel_text_address(addr)) {
-- const char *sym_name;
-- char *modname, buffer[128];
-- unsigned long junk, offset;
--
-- sym_name = kallsyms_lookup(addr, &junk, &offset,
-- &modname, buffer);
-- if (sym_name == NULL) {
-- if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12)
-- break;
-- size = sprintf(pbuf, "[<%08lx>] ", addr);
-- } else {
-- if (buf + LUSTRE_TRACE_SIZE
-- /* fix length + sizeof('\0') */
-- <= pbuf + strlen(buffer) + 28 + 1)
-- break;
-- size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ",
-- addr, buffer, stack - 1);
-- }
-- pbuf += size;
-- }
-- }
--out:
-- return buf;
--}
--
--#else /* !__arch_um__ && !__i386__ */
--
--char *portals_debug_dumpstack(void)
--{
- return "dump_stack\n";
- char *buf = stack_backtrace;
- buf[0] = '\0';
- return buf;
--}
--
--#endif /* __arch_um__ */
--struct task_struct *portals_current(void)
--{
-- CWARN("current task struct is %p\n", current);
-- return current;
--}
--
--EXPORT_SYMBOL(stack_backtrace_lock);
--EXPORT_SYMBOL(portals_debug_dumpstack);
--EXPORT_SYMBOL(portals_current);
--#endif /* __KERNEL__ */
--
--EXPORT_SYMBOL(portals_debug_dumplog);
--EXPORT_SYMBOL(portals_debug_set_level);
--EXPORT_SYMBOL(portals_run_upcall);
--EXPORT_SYMBOL(portals_run_lbug_upcall);
--EXPORT_SYMBOL(portals_nid2str);
-EXPORT_SYMBOL(portals_id2str);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2003 Cluster File Systems, Inc.
-- * Author: Eric Barton <eeb@clusterfs.com>
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#ifndef EXPORT_SYMTAB
--# define EXPORT_SYMTAB
--#endif
--
--#include <linux/config.h>
--#include <linux/module.h>
--#include <linux/kmod.h>
--#include <linux/kernel.h>
--#include <linux/kernel.h>
--#include <linux/mm.h>
--#include <linux/string.h>
--#include <linux/stat.h>
--#include <linux/errno.h>
--#include <linux/smp_lock.h>
--#include <linux/unistd.h>
--#include <linux/interrupt.h>
--#include <asm/system.h>
--#include <asm/uaccess.h>
--
--#define DEBUG_SUBSYSTEM S_PORTALS
--
--#include <linux/kp30.h>
--
--#if LWT_SUPPORT
--
-#if !KLWT_SUPPORT
--int lwt_enabled;
-lwt_cpu_t lwt_cpus[NR_CPUS];
-#endif
-
--int lwt_pages_per_cpu;
- lwt_cpu_t lwt_cpus[LWT_MAX_CPUS];
--
--/* NB only root is allowed to retrieve LWT info; it's an open door into the
-- * kernel... */
--
--int
--lwt_lookup_string (int *size, char *knl_ptr,
-- char *user_ptr, int user_size)
--{
-- int maxsize = 128;
--
-- /* knl_ptr was retrieved from an LWT snapshot and the caller wants to
-- * turn it into a string. NB we can crash with an access violation
-- * trying to determine the string length, so we're trusting our
-- * caller... */
--
-- if (!capable(CAP_SYS_ADMIN))
-- return (-EPERM);
--
-- if (user_size > 0 &&
-- maxsize > user_size)
-- maxsize = user_size;
--
-- *size = strnlen (knl_ptr, maxsize - 1) + 1;
--
-- if (user_ptr != NULL) {
-- if (user_size < 4)
-- return (-EINVAL);
--
-- if (copy_to_user (user_ptr, knl_ptr, *size))
-- return (-EFAULT);
--
-- /* Did I truncate the string? */
-- if (knl_ptr[*size - 1] != 0)
-- copy_to_user (user_ptr + *size - 4, "...", 4);
-- }
--
-- return (0);
--}
--
--int
--lwt_control (int enable, int clear)
--{
-- lwt_page_t *p;
-- int i;
-- int j;
--
-- if (!capable(CAP_SYS_ADMIN))
-- return (-EPERM);
--
- if (clear)
- for (i = 0; i < num_online_cpus(); i++) {
- p = lwt_cpus[i].lwtc_current_page;
- if (!enable) {
- LWT_EVENT(0,0,0,0);
- lwt_enabled = 0;
- mb();
- /* give people some time to stop adding traces */
- schedule_timeout(10);
- }
--
- for (j = 0; j < lwt_pages_per_cpu; j++) {
- memset (p->lwtp_events, 0, PAGE_SIZE);
- for (i = 0; i < num_online_cpus(); i++) {
- p = lwt_cpus[i].lwtc_current_page;
--
- p = list_entry (p->lwtp_list.next,
- lwt_page_t, lwtp_list);
- }
- if (p == NULL)
- return (-ENODATA);
-
- if (!clear)
- continue;
-
- for (j = 0; j < lwt_pages_per_cpu; j++) {
- memset (p->lwtp_events, 0, PAGE_SIZE);
-
- p = list_entry (p->lwtp_list.next,
- lwt_page_t, lwtp_list);
- }
-- }
--
- lwt_enabled = enable;
- mb();
- if (!enable) {
- /* give people some time to stop adding traces */
- schedule_timeout(10);
- if (enable) {
- lwt_enabled = 1;
- mb();
- LWT_EVENT(0,0,0,0);
-- }
--
-- return (0);
--}
--
--int
--lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
-- void *user_ptr, int user_size)
--{
-- const int events_per_page = PAGE_SIZE / sizeof(lwt_event_t);
-- const int bytes_per_page = events_per_page * sizeof(lwt_event_t);
-- lwt_page_t *p;
-- int i;
-- int j;
--
-- if (!capable(CAP_SYS_ADMIN))
-- return (-EPERM);
--
-- *ncpu = num_online_cpus();
-- *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page;
-- *now = get_cycles();
--
-- if (user_ptr == NULL)
-- return (0);
--
-- for (i = 0; i < num_online_cpus(); i++) {
-- p = lwt_cpus[i].lwtc_current_page;
-
- if (p == NULL)
- return (-ENODATA);
--
-- for (j = 0; j < lwt_pages_per_cpu; j++) {
-- if (copy_to_user(user_ptr, p->lwtp_events,
-- bytes_per_page))
-- return (-EFAULT);
--
-- user_ptr = ((char *)user_ptr) + bytes_per_page;
-- p = list_entry(p->lwtp_list.next,
-- lwt_page_t, lwtp_list);
--
-- }
-- }
--
-- return (0);
--}
--
--int
--lwt_init ()
--{
-- int i;
-- int j;
-
- for (i = 0; i < num_online_cpus(); i++)
- if (lwt_cpus[i].lwtc_current_page != NULL)
- return (-EALREADY);
--
- if (num_online_cpus() > LWT_MAX_CPUS) {
- CERROR ("Too many CPUs\n");
- return (-EINVAL);
- }
- LASSERT (!lwt_enabled);
--
-- /* NULL pointers, zero scalars */
-- memset (lwt_cpus, 0, sizeof (lwt_cpus));
-- lwt_pages_per_cpu = LWT_MEMORY / (num_online_cpus() * PAGE_SIZE);
--
-- for (i = 0; i < num_online_cpus(); i++)
-- for (j = 0; j < lwt_pages_per_cpu; j++) {
-- struct page *page = alloc_page (GFP_KERNEL);
-- lwt_page_t *lwtp;
--
-- if (page == NULL) {
-- CERROR ("Can't allocate page\n");
-- lwt_fini ();
-- return (-ENOMEM);
-- }
--
-- PORTAL_ALLOC(lwtp, sizeof (*lwtp));
-- if (lwtp == NULL) {
-- CERROR ("Can't allocate lwtp\n");
-- __free_page(page);
-- lwt_fini ();
-- return (-ENOMEM);
-- }
--
-- lwtp->lwtp_page = page;
-- lwtp->lwtp_events = page_address(page);
-- memset (lwtp->lwtp_events, 0, PAGE_SIZE);
--
-- if (j == 0) {
-- INIT_LIST_HEAD (&lwtp->lwtp_list);
-- lwt_cpus[i].lwtc_current_page = lwtp;
-- } else {
-- list_add (&lwtp->lwtp_list,
-- &lwt_cpus[i].lwtc_current_page->lwtp_list);
-- }
-- }
--
-- lwt_enabled = 1;
-- mb();
-
- LWT_EVENT(0,0,0,0);
--
-- return (0);
--}
--
--void
--lwt_fini ()
--{
-- int i;
-
- if (num_online_cpus() > LWT_MAX_CPUS)
- return;
--
- lwt_control(0, 0);
-
-- for (i = 0; i < num_online_cpus(); i++)
-- while (lwt_cpus[i].lwtc_current_page != NULL) {
-- lwt_page_t *lwtp = lwt_cpus[i].lwtc_current_page;
--
-- if (list_empty (&lwtp->lwtp_list)) {
-- lwt_cpus[i].lwtc_current_page = NULL;
-- } else {
-- lwt_cpus[i].lwtc_current_page =
-- list_entry (lwtp->lwtp_list.next,
-- lwt_page_t, lwtp_list);
--
-- list_del (&lwtp->lwtp_list);
-- }
--
-- __free_page (lwtp->lwtp_page);
-- PORTAL_FREE (lwtp, sizeof (*lwtp));
-- }
--}
--
--EXPORT_SYMBOL(lwt_enabled);
--EXPORT_SYMBOL(lwt_cpus);
--
--EXPORT_SYMBOL(lwt_init);
--EXPORT_SYMBOL(lwt_fini);
--EXPORT_SYMBOL(lwt_lookup_string);
--EXPORT_SYMBOL(lwt_control);
--EXPORT_SYMBOL(lwt_snapshot);
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#ifndef EXPORT_SYMTAB
--# define EXPORT_SYMTAB
--#endif
--#define DEBUG_SUBSYSTEM S_PORTALS
--
--#include <linux/config.h>
--#include <linux/module.h>
--#include <linux/kernel.h>
--#include <linux/mm.h>
--#include <linux/string.h>
--#include <linux/stat.h>
--#include <linux/init.h>
--#include <linux/errno.h>
--#include <linux/smp_lock.h>
--#include <linux/unistd.h>
--
--#include <asm/system.h>
--#include <asm/uaccess.h>
--
--#include <linux/fs.h>
--#include <linux/stat.h>
--#include <asm/uaccess.h>
--#include <asm/segment.h>
--#include <linux/miscdevice.h>
--
--#include <portals/lib-p30.h>
--#include <portals/p30.h>
--#include <linux/kp30.h>
- #include <linux/kpr.h>
--#include <linux/portals_compat25.h>
--
--#define PORTAL_MINOR 240
-
- extern void (kping_client)(struct portal_ioctl_data *);
--
--struct nal_cmd_handler {
- nal_cmd_handler_t nch_handler;
- void * nch_private;
- int nch_number;
- nal_cmd_handler_fn *nch_handler;
- void *nch_private;
--};
--
- static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1];
-static struct nal_cmd_handler nal_cmd[16];
--static DECLARE_MUTEX(nal_cmd_sem);
--
--#ifdef PORTAL_DEBUG
--void kportal_assertion_failed(char *expr, char *file, const char *func,
-- const int line)
--{
-- portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
-- "ASSERTION(%s) failed\n", expr);
-- LBUG_WITH_LOC(file, func, line);
--}
--#endif
--
--void
--kportal_daemonize (char *str)
--{
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
-- daemonize(str);
--#else
-- daemonize();
-- snprintf (current->comm, sizeof (current->comm), "%s", str);
--#endif
--}
--
--void
--kportal_memhog_free (struct portals_device_userstate *pdu)
--{
-- struct page **level0p = &pdu->pdu_memhog_root_page;
-- struct page **level1p;
-- struct page **level2p;
-- int count1;
-- int count2;
--
-- if (*level0p != NULL) {
--
-- level1p = (struct page **)page_address(*level0p);
-- count1 = 0;
--
-- while (count1 < PAGE_SIZE/sizeof(struct page *) &&
-- *level1p != NULL) {
--
-- level2p = (struct page **)page_address(*level1p);
-- count2 = 0;
--
-- while (count2 < PAGE_SIZE/sizeof(struct page *) &&
-- *level2p != NULL) {
--
-- __free_page(*level2p);
-- pdu->pdu_memhog_pages--;
-- level2p++;
-- count2++;
-- }
--
-- __free_page(*level1p);
-- pdu->pdu_memhog_pages--;
-- level1p++;
-- count1++;
-- }
--
-- __free_page(*level0p);
-- pdu->pdu_memhog_pages--;
--
-- *level0p = NULL;
-- }
--
-- LASSERT (pdu->pdu_memhog_pages == 0);
--}
--
--int
--kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags)
--{
-- struct page **level0p;
-- struct page **level1p;
-- struct page **level2p;
-- int count1;
-- int count2;
--
-- LASSERT (pdu->pdu_memhog_pages == 0);
-- LASSERT (pdu->pdu_memhog_root_page == NULL);
--
-- if (npages < 0)
-- return -EINVAL;
--
-- if (npages == 0)
-- return 0;
--
-- level0p = &pdu->pdu_memhog_root_page;
-- *level0p = alloc_page(flags);
-- if (*level0p == NULL)
-- return -ENOMEM;
-- pdu->pdu_memhog_pages++;
--
-- level1p = (struct page **)page_address(*level0p);
-- count1 = 0;
-- memset(level1p, 0, PAGE_SIZE);
--
-- while (pdu->pdu_memhog_pages < npages &&
-- count1 < PAGE_SIZE/sizeof(struct page *)) {
--
-- if (signal_pending(current))
-- return (-EINTR);
--
-- *level1p = alloc_page(flags);
-- if (*level1p == NULL)
-- return -ENOMEM;
-- pdu->pdu_memhog_pages++;
--
-- level2p = (struct page **)page_address(*level1p);
-- count2 = 0;
-- memset(level2p, 0, PAGE_SIZE);
--
-- while (pdu->pdu_memhog_pages < npages &&
-- count2 < PAGE_SIZE/sizeof(struct page *)) {
--
-- if (signal_pending(current))
-- return (-EINTR);
--
-- *level2p = alloc_page(flags);
-- if (*level2p == NULL)
-- return (-ENOMEM);
-- pdu->pdu_memhog_pages++;
--
-- level2p++;
-- count2++;
-- }
--
-- level1p++;
-- count1++;
-- }
--
-- return 0;
--}
--
--void
--kportal_blockallsigs ()
--{
-- unsigned long flags;
--
-- SIGNAL_MASK_LOCK(current, flags);
-- sigfillset(¤t->blocked);
-- RECALC_SIGPENDING;
-- SIGNAL_MASK_UNLOCK(current, flags);
--}
--
--/* called when opening /dev/device */
- static int kportal_psdev_open(struct inode * inode, struct file * file)
-static int libcfs_psdev_open(struct inode * inode, struct file * file)
--{
-- struct portals_device_userstate *pdu;
-- ENTRY;
--
-- if (!inode)
-- RETURN(-EINVAL);
--
-- PORTAL_MODULE_USE;
--
-- PORTAL_ALLOC(pdu, sizeof(*pdu));
-- if (pdu != NULL) {
-- pdu->pdu_memhog_pages = 0;
-- pdu->pdu_memhog_root_page = NULL;
-- }
-- file->private_data = pdu;
--
-- RETURN(0);
--}
--
--/* called when closing /dev/device */
- static int kportal_psdev_release(struct inode * inode, struct file * file)
-static int libcfs_psdev_release(struct inode * inode, struct file * file)
--{
-- struct portals_device_userstate *pdu;
-- ENTRY;
--
-- if (!inode)
-- RETURN(-EINVAL);
--
-- pdu = file->private_data;
-- if (pdu != NULL) {
-- kportal_memhog_free(pdu);
-- PORTAL_FREE(pdu, sizeof(*pdu));
-- }
--
-- PORTAL_MODULE_UNUSE;
-- RETURN(0);
--}
--
--static inline void freedata(void *data, int len)
--{
-- PORTAL_FREE(data, len);
- }
-
- static int
- kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid)
- {
- int rc;
- kpr_control_interface_t *ci;
-
- ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface);
- if (ci == NULL)
- return (-ENODEV);
-
- rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid);
-
- PORTAL_SYMBOL_PUT(kpr_control_interface);
- return (rc);
- }
-
- static int
- kportal_del_route(int gw_nalid, ptl_nid_t gw_nid,
- ptl_nid_t lo, ptl_nid_t hi)
- {
- int rc;
- kpr_control_interface_t *ci;
-
- ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
- if (ci == NULL)
- return (-ENODEV);
-
- rc = ci->kprci_del_route (gw_nalid, gw_nid, lo, hi);
-
- PORTAL_SYMBOL_PUT(kpr_control_interface);
- return (rc);
--}
--
- static int
- kportal_notify_router (int gw_nalid, ptl_nid_t gw_nid,
- int alive, time_t when)
-struct nal_cmd_handler *
-libcfs_find_nal_cmd_handler(int nal)
--{
- int rc;
- kpr_control_interface_t *ci;
-
- /* No error if router not preset. Sysadmin is allowed to notify
- * _everywhere_ when a NID boots or crashes, even if they know
- * nothing of the peer. */
- ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface);
- if (ci == NULL)
- return (0);
- int i;
--
- rc = ci->kprci_notify (gw_nalid, gw_nid, alive, when);
- for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
- if (nal_cmd[i].nch_handler != NULL &&
- nal_cmd[i].nch_number == nal)
- return (&nal_cmd[i]);
--
- PORTAL_SYMBOL_PUT(kpr_control_interface);
- return (rc);
- return (NULL);
--}
--
- static int
- kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp,
- ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp, int *alivep)
-int
-libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private)
--{
- int gateway_nalid;
- ptl_nid_t gateway_nid;
- ptl_nid_t lo_nid;
- ptl_nid_t hi_nid;
- int alive;
- int rc;
- kpr_control_interface_t *ci;
- struct nal_cmd_handler *cmd;
- int i;
- int rc;
--
- ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface);
- if (ci == NULL)
- return (-ENODEV);
- CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
--
- rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid,
- &lo_nid, &hi_nid, &alive);
- down(&nal_cmd_sem);
--
- if (rc == 0) {
- CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64", %s\n",
- index, gateway_nalid, gateway_nid, lo_nid, hi_nid,
- alive ? "up" : "down");
- if (libcfs_find_nal_cmd_handler(nal) != NULL) {
- up (&nal_cmd_sem);
- return (-EBUSY);
- }
--
- *gateway_nalidp = (__u32)gateway_nalid;
- *gateway_nidp = gateway_nid;
- *lo_nidp = lo_nid;
- *hi_nidp = hi_nid;
- *alivep = alive;
- cmd = NULL;
- for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
- if (nal_cmd[i].nch_handler == NULL) {
- cmd = &nal_cmd[i];
- break;
- }
-
- if (cmd == NULL) {
- rc = -EBUSY;
- } else {
- rc = 0;
- cmd->nch_number = nal;
- cmd->nch_handler = handler;
- cmd->nch_private = private;
-- }
--
- PORTAL_SYMBOL_PUT (kpr_control_interface);
- return (rc);
- up(&nal_cmd_sem);
-
- return rc;
--}
-EXPORT_SYMBOL(libcfs_nal_cmd_register);
--
- static int
- kportal_router_cmd(struct portals_cfg *pcfg, void * private)
-void
-libcfs_nal_cmd_unregister(int nal)
--{
- int err = -EINVAL;
- ENTRY;
-
- switch(pcfg->pcfg_command) {
- default:
- CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
- break;
-
- case NAL_CMD_ADD_ROUTE:
- CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
- pcfg->pcfg_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- err = kportal_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- break;
- struct nal_cmd_handler *cmd;
--
- case NAL_CMD_DEL_ROUTE:
- CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
- pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- err = kportal_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- break;
- CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
--
- case NAL_CMD_NOTIFY_ROUTER: {
- CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
- pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_flags ? "Enabling" : "Disabling",
- (time_t)pcfg->pcfg_nid3);
-
- err = kportal_notify_router (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_flags,
- (time_t)pcfg->pcfg_nid3);
- break;
- }
-
- case NAL_CMD_GET_ROUTE:
- CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
- err = kportal_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
- &pcfg->pcfg_nid,
- &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
- &pcfg->pcfg_flags);
- break;
- }
- RETURN(err);
- down(&nal_cmd_sem);
- cmd = libcfs_find_nal_cmd_handler(nal);
- LASSERT (cmd != NULL);
- cmd->nch_handler = NULL;
- cmd->nch_private = NULL;
- up(&nal_cmd_sem);
--}
-EXPORT_SYMBOL(libcfs_nal_cmd_unregister);
--
--int
- kportal_nal_cmd(struct portals_cfg *pcfg)
-libcfs_nal_cmd(struct portals_cfg *pcfg)
--{
- struct nal_cmd_handler *cmd;
-- __u32 nal = pcfg->pcfg_nal;
- int rc = -EINVAL;
-
- int rc = -EINVAL;
-- ENTRY;
--
-- down(&nal_cmd_sem);
- if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
- cmd = libcfs_find_nal_cmd_handler(nal);
- if (cmd != NULL) {
-- CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal,
-- pcfg->pcfg_command);
- rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
- } else {
- CERROR("invalid nal: %d, cmd: %d\n", nal, pcfg->pcfg_command);
- rc = cmd->nch_handler(pcfg, cmd->nch_private);
-- }
-- up(&nal_cmd_sem);
- RETURN(rc);
- }
-
- ptl_handle_ni_t *
- kportal_get_ni (int nal)
- {
--
- switch (nal)
- {
- case QSWNAL:
- return (PORTAL_SYMBOL_GET(kqswnal_ni));
- case SOCKNAL:
- return (PORTAL_SYMBOL_GET(ksocknal_ni));
- case GMNAL:
- return (PORTAL_SYMBOL_GET(kgmnal_ni));
- case IBNAL:
- return (PORTAL_SYMBOL_GET(kibnal_ni));
- case TCPNAL:
- /* userspace NAL */
- return (NULL);
- case SCIMACNAL:
- return (PORTAL_SYMBOL_GET(kscimacnal_ni));
- default:
- /* A warning to a naive caller */
- CERROR ("unknown nal: %d\n", nal);
- return (NULL);
- }
- RETURN(rc);
--}
-
- void
- kportal_put_ni (int nal)
- {
-EXPORT_SYMBOL(libcfs_nal_cmd);
--
- switch (nal)
- {
- case QSWNAL:
- PORTAL_SYMBOL_PUT(kqswnal_ni);
- break;
- case SOCKNAL:
- PORTAL_SYMBOL_PUT(ksocknal_ni);
- break;
- case GMNAL:
- PORTAL_SYMBOL_PUT(kgmnal_ni);
- break;
- case IBNAL:
- PORTAL_SYMBOL_PUT(kibnal_ni);
- break;
- case TCPNAL:
- /* A lesson to a malicious caller */
- LBUG ();
- case SCIMACNAL:
- PORTAL_SYMBOL_PUT(kscimacnal_ni);
- break;
- default:
- CERROR ("unknown nal: %d\n", nal);
- }
- }
-static DECLARE_RWSEM(ioctl_list_sem);
-static LIST_HEAD(ioctl_list);
--
- int
- kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private)
-int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
--{
-- int rc = 0;
-
- CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler);
- down_read(&ioctl_list_sem);
- if (!list_empty(&hand->item))
- rc = -EBUSY;
- up_read(&ioctl_list_sem);
--
- if (nal > 0 && nal <= NAL_MAX_NR) {
- down(&nal_cmd_sem);
- if (nal_cmd[nal].nch_handler != NULL)
- rc = -EBUSY;
- else {
- nal_cmd[nal].nch_handler = handler;
- nal_cmd[nal].nch_private = private;
- }
- up(&nal_cmd_sem);
- if (rc == 0) {
- down_write(&ioctl_list_sem);
- list_add_tail(&hand->item, &ioctl_list);
- up_write(&ioctl_list_sem);
-- }
- return rc;
- RETURN(0);
--}
-EXPORT_SYMBOL(libcfs_register_ioctl);
--
- int
- kportal_nal_unregister(int nal)
-int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
--{
-- int rc = 0;
-
- CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal);
- down_read(&ioctl_list_sem);
- if (list_empty(&hand->item))
- rc = -ENOENT;
- up_read(&ioctl_list_sem);
--
- if (nal > 0 && nal <= NAL_MAX_NR) {
- down(&nal_cmd_sem);
- nal_cmd[nal].nch_handler = NULL;
- nal_cmd[nal].nch_private = NULL;
- up(&nal_cmd_sem);
- if (rc == 0) {
- down_write(&ioctl_list_sem);
- list_del_init(&hand->item);
- up_write(&ioctl_list_sem);
-- }
- return rc;
- RETURN(0);
--}
-
-EXPORT_SYMBOL(libcfs_deregister_ioctl);
--
- static int kportal_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
-static int libcfs_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
--{
- int err = 0;
- int err = -EINVAL;
-- char buf[1024];
-- struct portal_ioctl_data *data;
- char str[PTL_NALFMT_SIZE];
-
-- ENTRY;
--
-- if (current->fsuid != 0)
-- RETURN(err = -EACCES);
--
-- if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE ||
-- _IOC_NR(cmd) < IOC_PORTAL_MIN_NR ||
-- _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) {
-- CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
-- _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
-- RETURN(-EINVAL);
-- }
--
-- if (portal_ioctl_getdata(buf, buf + 800, (void *)arg)) {
-- CERROR("PORTALS ioctl: data error\n");
-- RETURN(-EINVAL);
-- }
--
-- data = (struct portal_ioctl_data *)buf;
--
-- switch (cmd) {
-- case IOC_PORTAL_CLEAR_DEBUG:
-- portals_debug_clear_buffer();
-- RETURN(0);
-- case IOC_PORTAL_PANIC:
-- if (!capable (CAP_SYS_BOOT))
-- RETURN (-EPERM);
-- panic("debugctl-invoked panic");
-- RETURN(0);
-- case IOC_PORTAL_MARK_DEBUG:
-- if (data->ioc_inlbuf1 == NULL ||
-- data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
-- RETURN(-EINVAL);
-- portals_debug_mark_buffer(data->ioc_inlbuf1);
- RETURN(0);
- case IOC_PORTAL_PING: {
- void (*ping)(struct portal_ioctl_data *);
-
- CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
- data->ioc_count, data->ioc_nid,
- portals_nid2str(data->ioc_nal, data->ioc_nid, str));
- ping = PORTAL_SYMBOL_GET(kping_client);
- if (!ping)
- CERROR("PORTAL_SYMBOL_GET failed\n");
- else {
- ping(data);
- PORTAL_SYMBOL_PUT(kping_client);
- }
-- RETURN(0);
- }
-
- case IOC_PORTAL_GET_NID: {
- const ptl_handle_ni_t *nip;
- ptl_process_id_t pid;
-
- CDEBUG (D_IOCTL, "Getting nid for nal [%d]\n", data->ioc_nal);
-
- nip = kportal_get_ni (data->ioc_nal);
- if (nip == NULL)
- RETURN (-EINVAL);
-
- err = PtlGetId (*nip, &pid);
- LASSERT (err == PTL_OK);
- kportal_put_ni (data->ioc_nal);
-
- data->ioc_nid = pid.nid;
- if (copy_to_user ((char *)arg, data, sizeof (*data)))
- err = -EFAULT;
- break;
- }
-
- case IOC_PORTAL_NAL_CMD: {
- struct portals_cfg pcfg;
-
- LASSERT (data->ioc_plen1 == sizeof(pcfg));
- err = copy_from_user(&pcfg, (void *)data->ioc_pbuf1,
- sizeof(pcfg));
- if ( err ) {
- EXIT;
- return err;
- }
-
- CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
- pcfg.pcfg_command);
- err = kportal_nal_cmd(&pcfg);
- if (err == 0) {
- if (copy_to_user((char *)data->ioc_pbuf1, &pcfg,
- sizeof (pcfg)))
- err = -EFAULT;
- if (copy_to_user((char *)arg, data, sizeof (*data)))
- err = -EFAULT;
- }
- break;
- }
- case IOC_PORTAL_FAIL_NID: {
- const ptl_handle_ni_t *nip;
-
- CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
- data->ioc_nal, data->ioc_nid, data->ioc_count);
-
- nip = kportal_get_ni (data->ioc_nal);
- if (nip == NULL)
- return (-EINVAL);
-
- err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
- kportal_put_ni (data->ioc_nal);
- break;
- }
--#if LWT_SUPPORT
-- case IOC_PORTAL_LWT_CONTROL:
-- err = lwt_control (data->ioc_flags, data->ioc_misc);
-- break;
--
- case IOC_PORTAL_LWT_SNAPSHOT:
- err = lwt_snapshot (&data->ioc_nid,
- &data->ioc_count, &data->ioc_misc,
- case IOC_PORTAL_LWT_SNAPSHOT: {
- cycles_t now;
- int ncpu;
- int total_size;
-
- err = lwt_snapshot (&now, &ncpu, &total_size,
-- data->ioc_pbuf1, data->ioc_plen1);
- data->ioc_nid = now;
- data->ioc_count = ncpu;
- data->ioc_misc = total_size;
-
- /* Hedge against broken user/kernel typedefs (e.g. cycles_t) */
- data->ioc_nid = sizeof(lwt_event_t);
- data->ioc_nid2 = offsetof(lwt_event_t, lwte_where);
-
-- if (err == 0 &&
-- copy_to_user((char *)arg, data, sizeof (*data)))
-- err = -EFAULT;
-- break;
- }
--
-- case IOC_PORTAL_LWT_LOOKUP_STRING:
-- err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1,
-- data->ioc_pbuf2, data->ioc_plen2);
-- if (err == 0 &&
-- copy_to_user((char *)arg, data, sizeof (*data)))
-- err = -EFAULT;
-- break;
--#endif
- case IOC_PORTAL_NAL_CMD: {
- struct portals_cfg pcfg;
-
- if (data->ioc_plen1 != sizeof(pcfg)) {
- CERROR("Bad ioc_plen1 %d (wanted %d)\n",
- data->ioc_plen1, sizeof(pcfg));
- err = -EINVAL;
- break;
- }
-
- if (copy_from_user(&pcfg, (void *)data->ioc_pbuf1,
- sizeof(pcfg))) {
- err = -EFAULT;
- break;
- }
-
- CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", pcfg.pcfg_nal,
- pcfg.pcfg_command);
- err = libcfs_nal_cmd(&pcfg);
-
- if (err == 0 &&
- copy_to_user((char *)data->ioc_pbuf1, &pcfg,
- sizeof (pcfg)))
- err = -EFAULT;
- break;
- }
-
-- case IOC_PORTAL_MEMHOG:
-- if (!capable (CAP_SYS_ADMIN))
-- err = -EPERM;
-- else if (file->private_data == NULL) {
-- err = -EINVAL;
-- } else {
-- kportal_memhog_free(file->private_data);
-- err = kportal_memhog_alloc(file->private_data,
-- data->ioc_count,
-- data->ioc_flags);
-- if (err != 0)
-- kportal_memhog_free(file->private_data);
-- }
-- break;
--
- default:
- default: {
- struct libcfs_ioctl_handler *hand;
-- err = -EINVAL;
- break;
- down_read(&ioctl_list_sem);
- list_for_each_entry(hand, &ioctl_list, item) {
- err = hand->handle_ioctl(data, cmd, arg);
- if (err != -EINVAL)
- break;
- }
- up_read(&ioctl_list_sem);
- } break;
-- }
--
-- RETURN(err);
--}
--
--
- static struct file_operations portalsdev_fops = {
- ioctl: kportal_ioctl,
- open: kportal_psdev_open,
- release: kportal_psdev_release
-static struct file_operations libcfs_fops = {
- ioctl: libcfs_ioctl,
- open: libcfs_psdev_open,
- release: libcfs_psdev_release
--};
--
--
- static struct miscdevice portal_dev = {
-static struct miscdevice libcfs_dev = {
-- PORTAL_MINOR,
-- "portals",
- &portalsdev_fops
- &libcfs_fops
--};
--
--extern int insert_proc(void);
--extern void remove_proc(void);
--MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
--MODULE_DESCRIPTION("Portals v3.1");
--MODULE_LICENSE("GPL");
--
- static int init_kportals_module(void)
-static int init_libcfs_module(void)
--{
-- int rc;
--
-- rc = portals_debug_init(5 * 1024 * 1024);
-- if (rc < 0) {
-- printk(KERN_ERR "LustreError: portals_debug_init: %d\n", rc);
-- return (rc);
-- }
--
--#if LWT_SUPPORT
-- rc = lwt_init();
-- if (rc != 0) {
-- CERROR("lwt_init: error %d\n", rc);
-- goto cleanup_debug;
-- }
--#endif
- sema_init(&nal_cmd_sem, 1);
-
- rc = misc_register(&portal_dev);
- rc = misc_register(&libcfs_dev);
-- if (rc) {
-- CERROR("misc_register: error %d\n", rc);
-- goto cleanup_lwt;
- }
-
- rc = PtlInit();
- if (rc) {
- CERROR("PtlInit: error %d\n", rc);
- goto cleanup_deregister;
-- }
--
-- rc = insert_proc();
-- if (rc) {
-- CERROR("insert_proc: error %d\n", rc);
- goto cleanup_fini;
- }
-
- rc = kportal_nal_register(ROUTER, kportal_router_cmd, NULL);
- if (rc) {
- CERROR("kportal_nal_registre: ROUTER error %d\n", rc);
- goto cleanup_proc;
- goto cleanup_deregister;
-- }
--
-- CDEBUG (D_OTHER, "portals setup OK\n");
-- return (0);
--
- cleanup_proc:
- remove_proc();
- cleanup_fini:
- PtlFini();
-- cleanup_deregister:
- misc_deregister(&portal_dev);
- misc_deregister(&libcfs_dev);
-- cleanup_lwt:
--#if LWT_SUPPORT
-- lwt_fini();
-- cleanup_debug:
--#endif
-- portals_debug_cleanup();
-- return rc;
--}
--
- static void exit_kportals_module(void)
-static void exit_libcfs_module(void)
--{
-- int rc;
--
- kportal_nal_unregister(ROUTER);
-- remove_proc();
- PtlFini();
--
-- CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
-- atomic_read(&portal_kmemory));
--
- rc = misc_deregister(&portal_dev);
- rc = misc_deregister(&libcfs_dev);
-- if (rc)
-- CERROR("misc_deregister error %d\n", rc);
--
--#if LWT_SUPPORT
-- lwt_fini();
--#endif
--
-- if (atomic_read(&portal_kmemory) != 0)
-- CERROR("Portals memory leaked: %d bytes\n",
-- atomic_read(&portal_kmemory));
--
-- rc = portals_debug_cleanup();
-- if (rc)
-- printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc);
--}
--
- EXPORT_SYMBOL(lib_dispatch);
- EXPORT_SYMBOL(PtlMEAttach);
- EXPORT_SYMBOL(PtlMEInsert);
- EXPORT_SYMBOL(PtlMEUnlink);
- EXPORT_SYMBOL(PtlEQAlloc);
- EXPORT_SYMBOL(PtlMDAttach);
- EXPORT_SYMBOL(PtlMDUnlink);
- EXPORT_SYMBOL(PtlNIInit);
- EXPORT_SYMBOL(PtlNIFini);
- EXPORT_SYMBOL(PtlNIDebug);
- EXPORT_SYMBOL(PtlInit);
- EXPORT_SYMBOL(PtlFini);
- EXPORT_SYMBOL(PtlPut);
- EXPORT_SYMBOL(PtlGet);
- EXPORT_SYMBOL(ptl_err_str);
- EXPORT_SYMBOL(PtlEQWait);
- EXPORT_SYMBOL(PtlEQFree);
- EXPORT_SYMBOL(PtlEQGet);
- EXPORT_SYMBOL(PtlGetId);
- EXPORT_SYMBOL(PtlMDBind);
- EXPORT_SYMBOL(lib_iov_nob);
- EXPORT_SYMBOL(lib_copy_iov2buf);
- EXPORT_SYMBOL(lib_copy_buf2iov);
- EXPORT_SYMBOL(lib_extract_iov);
- EXPORT_SYMBOL(lib_kiov_nob);
- EXPORT_SYMBOL(lib_copy_kiov2buf);
- EXPORT_SYMBOL(lib_copy_buf2kiov);
- EXPORT_SYMBOL(lib_extract_kiov);
- EXPORT_SYMBOL(lib_finalize);
- EXPORT_SYMBOL(lib_parse);
- EXPORT_SYMBOL(lib_fake_reply_msg);
- EXPORT_SYMBOL(lib_init);
- EXPORT_SYMBOL(lib_fini);
- EXPORT_SYMBOL(dispatch_name);
--EXPORT_SYMBOL(kportal_daemonize);
--EXPORT_SYMBOL(kportal_blockallsigs);
- EXPORT_SYMBOL(kportal_nal_register);
- EXPORT_SYMBOL(kportal_nal_unregister);
--EXPORT_SYMBOL(kportal_assertion_failed);
- EXPORT_SYMBOL(kportal_get_ni);
- EXPORT_SYMBOL(kportal_put_ni);
- EXPORT_SYMBOL(kportal_nal_cmd);
--
- module_init(init_kportals_module);
- module_exit (exit_kportals_module);
-module_init(init_libcfs_module);
-module_exit(exit_libcfs_module);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- * Author: Zach Brown <zab@zabbo.net>
-- * Author: Peter J. Braam <braam@clusterfs.com>
-- * Author: Phil Schwan <phil@clusterfs.com>
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#ifndef EXPORT_SYMTAB
--# define EXPORT_SYMTAB
--#endif
--
--#include <linux/config.h>
--#include <linux/module.h>
--#include <linux/kernel.h>
--#include <linux/mm.h>
--#include <linux/string.h>
--#include <linux/stat.h>
--#include <linux/errno.h>
--#include <linux/smp_lock.h>
--#include <linux/unistd.h>
--#include <net/sock.h>
--#include <linux/uio.h>
--
--#include <asm/system.h>
--#include <asm/uaccess.h>
--
--#include <linux/fs.h>
--#include <linux/file.h>
--#include <linux/stat.h>
--#include <linux/list.h>
--#include <asm/uaccess.h>
--#include <asm/segment.h>
--
--#include <linux/proc_fs.h>
--#include <linux/sysctl.h>
--
--# define DEBUG_SUBSYSTEM S_PORTALS
--
--#include <linux/kp30.h>
--#include <asm/div64.h>
--#include "tracefile.h"
--
--static struct ctl_table_header *portals_table_header = NULL;
--extern char debug_file_path[1024];
--extern char portals_upcall[1024];
--
--#define PSDEV_PORTALS (0x100)
- #define PSDEV_DEBUG 1 /* control debugging */
- #define PSDEV_SUBSYSTEM_DEBUG 2 /* control debugging */
- #define PSDEV_PRINTK 3 /* force all errors to console */
- #define PSDEV_CONSOLE 4 /* allow _any_ messages to console */
- #define PSDEV_DEBUG_PATH 5 /* crashdump log location */
- #define PSDEV_DEBUG_DUMP_PATH 6 /* crashdump tracelog location */
- #define PSDEV_PORTALS_UPCALL 7 /* User mode upcall script */
-enum {
- PSDEV_DEBUG = 1, /* control debugging */
- PSDEV_SUBSYSTEM_DEBUG, /* control debugging */
- PSDEV_PRINTK, /* force all errors to console */
- PSDEV_CONSOLE, /* allow _any_ messages to console */
- PSDEV_DEBUG_PATH, /* crashdump log location */
- PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */
- PSDEV_PORTALS_UPCALL, /* User mode upcall script */
- PSDEV_PORTALS_MEMUSED, /* bytes currently PORTAL_ALLOCated */
-};
--
- #define PORTALS_PRIMARY_CTLCNT 7
- static struct ctl_table portals_table[PORTALS_PRIMARY_CTLCNT + 1] = {
-static struct ctl_table portals_table[] = {
-- {PSDEV_DEBUG, "debug", &portal_debug, sizeof(int), 0644, NULL,
-- &proc_dointvec},
-- {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &portal_subsystem_debug,
-- sizeof(int), 0644, NULL, &proc_dointvec},
-- {PSDEV_PRINTK, "printk", &portal_printk, sizeof(int), 0644, NULL,
-- &proc_dointvec},
-- {PSDEV_DEBUG_PATH, "debug_path", debug_file_path,
-- sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string},
-- {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall,
-- sizeof(portals_upcall), 0644, NULL, &proc_dostring,
-- &sysctl_string},
- {PSDEV_PORTALS_MEMUSED, "memused", (int *)&portal_kmemory.counter,
- sizeof(int), 0644, NULL, &proc_dointvec},
-- {0}
--};
--
--static struct ctl_table top_table[2] = {
-- {PSDEV_PORTALS, "portals", NULL, 0, 0555, portals_table},
-- {0}
--};
--
--
--#ifdef PORTALS_PROFILING
--/*
-- * profiling stuff. we do this statically for now 'cause its simple,
-- * but we could do some tricks with elf sections to have this array
-- * automatically built.
-- */
--#define def_prof(FOO) [PROF__##FOO] = {#FOO, 0, }
--
--struct prof_ent prof_ents[] = {
-- def_prof(our_recvmsg),
-- def_prof(our_sendmsg),
-- def_prof(socknal_recv),
-- def_prof(lib_parse),
-- def_prof(conn_list_walk),
-- def_prof(memcpy),
-- def_prof(lib_finalize),
-- def_prof(pingcli_time),
-- def_prof(gmnal_send),
-- def_prof(gmnal_recv),
--};
--
--EXPORT_SYMBOL(prof_ents);
--
--/*
-- * this function is as crazy as the proc filling api
-- * requires.
-- *
-- * buffer: page allocated for us to scribble in. the
-- * data returned to the user will be taken from here.
-- * *start: address of the pointer that will tell the
-- * caller where in buffer the data the user wants is.
-- * ppos: offset in the entire /proc file that the user
-- * currently wants.
-- * wanted: the amount of data the user wants.
-- *
-- * while going, 'curpos' is the offset in the entire
-- * file where we currently are. We only actually
-- * start filling buffer when we get to a place in
-- * the file that the user cares about.
-- *
-- * we take care to only sprintf when the user cares because
-- * we're holding a lock while we do this.
-- *
-- * we're smart and know that we generate fixed size lines.
-- * we only start writing to the buffer when the user cares.
-- * This is unpredictable because we don't snapshot the
-- * list between calls that are filling in a file from
-- * the list. The list could change mid read and the
-- * output will look very weird indeed. oh well.
-- */
--
--static int prof_read_proc(char *buffer, char **start, off_t ppos, int wanted,
-- int *eof, void *data)
--{
-- int len = 0, i;
-- int curpos;
-- char *header = "Interval Cycles_per (Starts Finishes Total)\n";
-- int header_len = strlen(header);
-- char *format = "%-15s %.12Ld (%.12d %.12d %.12Ld)";
-- int line_len = (15 + 1 + 12 + 2 + 12 + 1 + 12 + 1 + 12 + 1);
--
-- *start = buffer;
--
-- if (ppos < header_len) {
-- int diff = MIN(header_len, wanted);
-- memcpy(buffer, header + ppos, diff);
-- len += diff;
-- ppos += diff;
-- }
--
-- if (len >= wanted)
-- goto out;
--
-- curpos = header_len;
--
-- for ( i = 0; i < MAX_PROFS ; i++) {
-- int copied;
-- struct prof_ent *pe = &prof_ents[i];
-- long long cycles_per;
-- /*
-- * find the part of the array that the buffer wants
-- */
-- if (ppos >= (curpos + line_len)) {
-- curpos += line_len;
-- continue;
-- }
-- /* the clever caller split a line */
-- if (ppos > curpos) {
-- *start = buffer + (ppos - curpos);
-- }
--
-- if (pe->finishes == 0)
-- cycles_per = 0;
-- else
-- {
-- cycles_per = pe->total_cycles;
-- do_div (cycles_per, pe->finishes);
-- }
--
-- copied = sprintf(buffer + len, format, pe->str, cycles_per,
-- pe->starts, pe->finishes, pe->total_cycles);
--
-- len += copied;
--
-- /* pad to line len, -1 for \n */
-- if ((copied < line_len-1)) {
-- int diff = (line_len-1) - copied;
-- memset(buffer + len, ' ', diff);
-- len += diff;
-- copied += diff;
-- }
--
-- buffer[len++]= '\n';
--
-- /* bail if we have enough */
-- if (((buffer + len) - *start) >= wanted)
-- break;
--
-- curpos += line_len;
-- }
--
-- /* lameness */
-- if (i == MAX_PROFS)
-- *eof = 1;
-- out:
--
-- return MIN(((buffer + len) - *start), wanted);
--}
--
--/*
-- * all kids love /proc :/
-- */
--static unsigned char basedir[]="net/portals";
--#endif /* PORTALS_PROFILING */
--
--int insert_proc(void)
--{
-- struct proc_dir_entry *ent;
--#if PORTALS_PROFILING
-- unsigned char dir[128];
--
-- if (ARRAY_SIZE(prof_ents) != MAX_PROFS) {
-- CERROR("profiling enum and array are out of sync.\n");
-- return -1;
-- }
--
-- /*
-- * This is pretty lame. assuming that failure just
-- * means that they already existed.
-- */
-- strcat(dir, basedir);
-- create_proc_entry(dir, S_IFDIR, 0);
--
-- strcat(dir, "/cycles");
-- ent = create_proc_entry(dir, 0, 0);
-- if (!ent) {
-- CERROR("couldn't register %s?\n", dir);
-- return -1;
-- }
--
-- ent->data = NULL;
-- ent->read_proc = prof_read_proc;
--#endif /* PORTALS_PROFILING */
--
--#ifdef CONFIG_SYSCTL
-- if (!portals_table_header)
-- portals_table_header = register_sysctl_table(top_table, 0);
--#endif
--
-- ent = create_proc_entry("sys/portals/dump_kernel", 0, NULL);
-- if (ent == NULL) {
-- CERROR("couldn't register dump_kernel\n");
-- return -1;
-- }
-- ent->write_proc = trace_dk;
--
-- ent = create_proc_entry("sys/portals/daemon_file", 0, NULL);
-- if (ent == NULL) {
-- CERROR("couldn't register daemon_file\n");
-- return -1;
-- }
-- ent->write_proc = trace_write_daemon_file;
-- ent->read_proc = trace_read_daemon_file;
--
- ent = create_proc_entry("sys/portals/debug_mb", 0, NULL);
- ent = create_proc_entry("sys/portals/debug_size", 0, NULL);
-- if (ent == NULL) {
- CERROR("couldn't register debug_mb\n");
- CERROR("couldn't register debug_size\n");
-- return -1;
-- }
- ent->write_proc = trace_write_debug_mb;
- ent->read_proc = trace_read_debug_mb;
- ent->write_proc = trace_write_debug_size;
- ent->read_proc = trace_read_debug_size;
--
-- return 0;
--}
--
--void remove_proc(void)
--{
--#if PORTALS_PROFILING
-- unsigned char dir[128];
-- int end;
--
-- dir[0]='\0';
-- strcat(dir, basedir);
--
-- end = strlen(dir);
--
-- strcat(dir, "/cycles");
-- remove_proc_entry(dir, 0);
--
-- dir[end] = '\0';
-- remove_proc_entry(dir, 0);
--#endif /* PORTALS_PROFILING */
--
-- remove_proc_entry("sys/portals/dump_kernel", NULL);
-- remove_proc_entry("sys/portals/daemon_file", NULL);
- remove_proc_entry("sys/portals/debug_mb", NULL);
- remove_proc_entry("sys/portals/debug_size", NULL);
--
--#ifdef CONFIG_SYSCTL
-- if (portals_table_header)
-- unregister_sysctl_table(portals_table_header);
-- portals_table_header = NULL;
--#endif
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2004 Cluster File Systems, Inc.
-- * Author: Zach Brown <zab@clusterfs.com>
-- * Author: Phil Schwan <phil@clusterfs.com>
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#include <linux/kernel.h>
--#include <linux/module.h>
--#include <linux/init.h>
--#include <linux/rwsem.h>
--#include <linux/proc_fs.h>
--#include <linux/file.h>
--#include <linux/smp.h>
--#include <linux/ctype.h>
--#include <asm/uaccess.h>
--#ifdef HAVE_MM_INLINE
--#include <linux/mm_inline.h>
--#endif
--
--#define DEBUG_SUBSYSTEM S_PORTALS
--
--#include <linux/kp30.h>
--#include <linux/portals_compat25.h>
-#include <linux/lustre_compat25.h>
--#include <linux/libcfs.h>
--
- #define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
-#define TCD_MAX_PAGES 1280
--
--/* XXX move things up to the top, comment */
--
--static union {
-- struct trace_cpu_data {
-- struct list_head tcd_pages;
-- unsigned long tcd_cur_pages;
--
-- struct list_head tcd_daemon_pages;
-- unsigned long tcd_cur_daemon_pages;
--
-- unsigned long tcd_max_pages;
-- int tcd_shutting_down;
-- } tcd;
-- char __pad[SMP_CACHE_BYTES];
--} trace_data[NR_CPUS] __cacheline_aligned;
--
--struct page_collection {
-- struct list_head pc_pages;
-- spinlock_t pc_lock;
-- int pc_want_daemon_pages;
--};
--
--struct tracefiled_ctl {
-- struct completion tctl_start;
-- struct completion tctl_stop;
-- wait_queue_head_t tctl_waitq;
-- pid_t tctl_pid;
-- atomic_t tctl_shutdown;
--};
--
- #define TRACEFILE_SIZE (500 << 20)
--static DECLARE_RWSEM(tracefile_sem);
--static char *tracefile = NULL;
- static long long tracefile_size = TRACEFILE_SIZE;
--static struct tracefiled_ctl trace_tctl;
--static DECLARE_MUTEX(trace_thread_sem);
--static int thread_running = 0;
--
--#ifndef get_cpu
--#define get_cpu() smp_processor_id()
--#define put_cpu() do { } while (0)
--#endif
--
--#define trace_get_tcd(FLAGS) ({ \
-- struct trace_cpu_data *__ret; \
-- int __cpu = get_cpu(); \
-- local_irq_save(FLAGS); \
-- __ret = &trace_data[__cpu].tcd; \
-- __ret; \
--})
--
--#define trace_put_tcd(TCD, FLAGS) do { \
-- local_irq_restore(FLAGS); \
-- put_cpu(); \
--} while (0)
--
--static void put_pages_on_daemon_list_on_cpu(void *info);
--
--/* return a page that has 'len' bytes left at the end */
--static struct page *trace_get_page(struct trace_cpu_data *tcd,
-- unsigned long len)
--{
-- struct page *page = NULL;
--
-- if (len > PAGE_SIZE) {
-- printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
-- "page\n", len);
-- return NULL;
-- }
--
-- if (!list_empty(&tcd->tcd_pages)) {
-- page = list_entry(tcd->tcd_pages.prev, struct page,
-- PAGE_LIST_ENTRY);
-- if (page->index + len <= PAGE_SIZE)
-- return page;
-- }
--
-- if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
-- page = alloc_page(GFP_ATOMIC);
-- if (page == NULL) {
-- /* the kernel should print a message for us. fall back
-- * to using the last page in the ring buffer. */
-- goto ring_buffer;
- return NULL;
-- }
-- page->index = 0;
-- page->mapping = (void *)(long)smp_processor_id();
-- list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
-- tcd->tcd_cur_pages++;
--
-- if (tcd->tcd_cur_pages > 8 && thread_running) {
-- struct tracefiled_ctl *tctl = &trace_tctl;
-- wake_up(&tctl->tctl_waitq);
-- }
-- return page;
-- }
--
-- ring_buffer:
-- if (thread_running) {
-- int pgcount = tcd->tcd_cur_pages / 10;
-- struct page_collection pc;
-- struct list_head *pos, *tmp;
-- printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
-- " 10%% of pages (%d)\n", pgcount + 1);
--
-- INIT_LIST_HEAD(&pc.pc_pages);
-- spin_lock_init(&pc.pc_lock);
--
-- list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
-- struct page *page;
--
-- if (pgcount-- == 0)
-- break;
--
-- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-- list_del(&PAGE_LIST(page));
-- list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
-- tcd->tcd_cur_pages--;
-- }
-- put_pages_on_daemon_list_on_cpu(&pc);
-- }
-- LASSERT(!list_empty(&tcd->tcd_pages));
--
-- page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
-- page->index = 0;
--
-- list_del(&PAGE_LIST(page));
-- list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
-- return page;
--}
--
--static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
-- int len, char *file, const char *fn)
--{
-- char *prefix = NULL, *ptype = NULL;
--
-- if ((mask & D_EMERG) != 0) {
-- prefix = "LustreError";
-- ptype = KERN_EMERG;
-- } else if ((mask & D_ERROR) != 0) {
-- prefix = "LustreError";
-- ptype = KERN_ERR;
-- } else if ((mask & D_WARNING) != 0) {
-- prefix = "Lustre";
-- ptype = KERN_WARNING;
-- } else if (portal_printk) {
-- prefix = "Lustre";
-- ptype = KERN_INFO;
-- }
-
-
-- printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
-- hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
--}
--
--void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
-- const int line, unsigned long stack, char *format, ...)
--{
-- struct trace_cpu_data *tcd;
-- struct ptldebug_header header;
-- struct page *page;
- char *debug_buf = format;
- int known_size, needed = 85 /* average message length */, max_nob;
- char *debug_buf;
- int known_size, needed, max_nob;
-- va_list ap;
-- unsigned long flags;
-- struct timeval tv;
--
-- if (*(format + strlen(format) - 1) != '\n')
-- printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
-- file, line, fn);
--
-- tcd = trace_get_tcd(flags);
-- if (tcd->tcd_shutting_down)
-- goto out;
--
-- do_gettimeofday(&tv);
--
-- header.ph_subsys = subsys;
-- header.ph_mask = mask;
-- header.ph_cpu_id = smp_processor_id();
-- header.ph_sec = (__u32)tv.tv_sec;
-- header.ph_usec = tv.tv_usec;
-- header.ph_stack = stack;
-- header.ph_pid = current->pid;
-- header.ph_line_num = line;
--
--#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-- header.ph_extern_pid = current->thread.extern_pid;
--#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-- header.ph_extern_pid = current->thread.mode.tt.extern_pid;
--#else
-- header.ph_extern_pid = 0;
--#endif
--
-- known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
--
- page = trace_get_page(tcd, known_size + 40); /* slop */
-- retry:
- page = trace_get_page(tcd, needed + known_size);
- if (page == NULL) {
- debug_buf = format;
- if (needed + known_size > PAGE_SIZE)
- mask |= D_ERROR;
- needed = strlen(format);
- if (page == NULL)
-- goto out;
- }
--
-- debug_buf = page_address(page) + page->index + known_size;
--
- va_start(ap, format);
-- max_nob = PAGE_SIZE - page->index - known_size;
-- LASSERT(max_nob > 0);
- va_start(ap, format);
-- needed = vsnprintf(debug_buf, max_nob, format, ap);
-- va_end(ap);
--
- if (needed > max_nob) /* overflow. oh poop. */
- if (needed > max_nob) {
- /* overflow. oh poop. */
- page = trace_get_page(tcd, needed + known_size);
-- goto retry;
- }
--
-- header.ph_len = known_size + needed;
-- debug_buf = page_address(page) + page->index;
--
-- memcpy(debug_buf, &header, sizeof(header));
-- page->index += sizeof(header);
-- debug_buf += sizeof(header);
--
-- strcpy(debug_buf, file);
-- page->index += strlen(file) + 1;
-- debug_buf += strlen(file) + 1;
--
-- strcpy(debug_buf, fn);
-- page->index += strlen(fn) + 1;
-- debug_buf += strlen(fn) + 1;
--
-- page->index += needed;
-- if (page->index > PAGE_SIZE)
-- printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
-- page->index);
--
- out:
-- if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
-- print_to_console(&header, mask, debug_buf, needed, file, fn);
--
- out:
-- trace_put_tcd(tcd, flags);
--}
--EXPORT_SYMBOL(portals_debug_msg);
--
--static void collect_pages_on_cpu(void *info)
--{
-- struct trace_cpu_data *tcd;
-- unsigned long flags;
-- struct page_collection *pc = info;
--
-- tcd = trace_get_tcd(flags);
--
-- spin_lock(&pc->pc_lock);
-- list_splice(&tcd->tcd_pages, &pc->pc_pages);
-- INIT_LIST_HEAD(&tcd->tcd_pages);
-- tcd->tcd_cur_pages = 0;
-- if (pc->pc_want_daemon_pages) {
-- list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
-- INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
-- tcd->tcd_cur_daemon_pages = 0;
-- }
-- spin_unlock(&pc->pc_lock);
--
-- trace_put_tcd(tcd, flags);
--}
--
--static void collect_pages(struct page_collection *pc)
--{
-- /* needs to be fixed up for preempt */
-- INIT_LIST_HEAD(&pc->pc_pages);
-- collect_pages_on_cpu(pc);
-- smp_call_function(collect_pages_on_cpu, pc, 0, 1);
--}
--
--static void put_pages_back_on_cpu(void *info)
--{
-- struct page_collection *pc = info;
-- struct trace_cpu_data *tcd;
-- struct list_head *pos, *tmp, *cur_head;
-- unsigned long flags;
--
-- tcd = trace_get_tcd(flags);
--
-- cur_head = tcd->tcd_pages.next;
--
-- spin_lock(&pc->pc_lock);
-- list_for_each_safe(pos, tmp, &pc->pc_pages) {
-- struct page *page;
--
-- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-- LASSERT(page->index <= PAGE_SIZE);
-- LASSERT(page_count(page) > 0);
--
-- if ((unsigned long)page->mapping != smp_processor_id())
-- continue;
--
-- list_del(&PAGE_LIST(page));
-- list_add_tail(&PAGE_LIST(page), cur_head);
-- tcd->tcd_cur_pages++;
-- }
-- spin_unlock(&pc->pc_lock);
--
-- trace_put_tcd(tcd, flags);
--}
--
--static void put_pages_back(struct page_collection *pc)
--{
-- /* needs to be fixed up for preempt */
-- put_pages_back_on_cpu(pc);
-- smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
--}
--
--/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
-- * we have a good amount of data at all times for dumping during an LBUG, even
-- * if we have been steadily writing (and otherwise discarding) pages via the
-- * debug daemon. */
--static void put_pages_on_daemon_list_on_cpu(void *info)
--{
-- struct page_collection *pc = info;
-- struct trace_cpu_data *tcd;
-- struct list_head *pos, *tmp;
-- unsigned long flags;
--
-- tcd = trace_get_tcd(flags);
--
-- spin_lock(&pc->pc_lock);
-- list_for_each_safe(pos, tmp, &pc->pc_pages) {
-- struct page *page;
--
-- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-- LASSERT(page->index <= PAGE_SIZE);
-- LASSERT(page_count(page) > 0);
-- if ((unsigned long)page->mapping != smp_processor_id())
-- continue;
--
-- list_del(&PAGE_LIST(page));
-- list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
-- tcd->tcd_cur_daemon_pages++;
--
-- if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
-- LASSERT(!list_empty(&tcd->tcd_daemon_pages));
-- page = list_entry(tcd->tcd_daemon_pages.next,
-- struct page, PAGE_LIST_ENTRY);
--
-- LASSERT(page->index <= PAGE_SIZE);
-- LASSERT(page_count(page) > 0);
--
-- page->index = 0;
-- list_del(&PAGE_LIST(page));
-- page->mapping = NULL;
-- __free_page(page);
-- tcd->tcd_cur_daemon_pages--;
-- }
-- }
-- spin_unlock(&pc->pc_lock);
--
-- trace_put_tcd(tcd, flags);
--}
--
--static void put_pages_on_daemon_list(struct page_collection *pc)
--{
-- put_pages_on_daemon_list_on_cpu(pc);
-- smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
--}
--
--void trace_debug_print(void)
--{
-- struct page_collection pc;
-- struct list_head *pos, *tmp;
--
-- spin_lock_init(&pc.pc_lock);
--
-- collect_pages(&pc);
-- list_for_each_safe(pos, tmp, &pc.pc_pages) {
-- struct page *page;
-- char *p, *file, *fn;
--
-- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-- LASSERT(page->index <= PAGE_SIZE);
-- LASSERT(page_count(page) > 0);
--
-- p = page_address(page);
-- while (p < ((char *)page_address(page) + PAGE_SIZE)) {
-- struct ptldebug_header *hdr;
-- int len;
-- hdr = (void *)p;
-- p += sizeof(*hdr);
-- file = p;
-- p += strlen(file) + 1;
-- fn = p;
-- p += strlen(fn) + 1;
-- len = hdr->ph_len - (p - (char *)hdr);
--
-- print_to_console(hdr, D_EMERG, p, len, file, fn);
-- }
--
-- list_del(&PAGE_LIST(page));
-- page->mapping = NULL;
-- __free_page(page);
-- }
--}
--
--int tracefile_dump_all_pages(char *filename)
--{
-- struct page_collection pc;
-- struct file *filp;
-- struct list_head *pos, *tmp;
-- mm_segment_t oldfs;
-- int rc;
--
-- down_write(&tracefile_sem);
--
- filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
- filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY, 0600);
-- if (IS_ERR(filp)) {
-- rc = PTR_ERR(filp);
-- printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
- filename, rc);
- filename, rc);
-- goto out;
-- }
--
-- spin_lock_init(&pc.pc_lock);
-- pc.pc_want_daemon_pages = 1;
-- collect_pages(&pc);
-- if (list_empty(&pc.pc_pages)) {
-- rc = 0;
-- goto close;
-- }
--
-- /* ok, for now, just write the pages. in the future we'll be building
-- * iobufs with the pages and calling generic_direct_IO */
-- oldfs = get_fs();
-- set_fs(get_ds());
-- list_for_each_safe(pos, tmp, &pc.pc_pages) {
-- struct page *page;
--
-- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-- LASSERT(page->index <= PAGE_SIZE);
-- LASSERT(page_count(page) > 0);
--
-- rc = filp->f_op->write(filp, page_address(page), page->index,
-- &filp->f_pos);
-- if (rc != page->index) {
-- printk(KERN_WARNING "wanted to write %lu but wrote "
-- "%d\n", page->index, rc);
-- put_pages_back(&pc);
-- break;
-- }
-- list_del(&PAGE_LIST(page));
-- page->mapping = NULL;
-- __free_page(page);
-- }
-- set_fs(oldfs);
-- rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
-- if (rc)
-- printk(KERN_ERR "sync returns %d\n", rc);
-- close:
-- filp_close(filp, 0);
-- out:
-- up_write(&tracefile_sem);
-- return rc;
--}
--
--void trace_flush_pages(void)
--{
-- struct page_collection pc;
-- struct list_head *pos, *tmp;
--
-- spin_lock_init(&pc.pc_lock);
--
-- collect_pages(&pc);
-- list_for_each_safe(pos, tmp, &pc.pc_pages) {
-- struct page *page;
--
-- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-- LASSERT(page->index <= PAGE_SIZE);
-- LASSERT(page_count(page) > 0);
--
-- list_del(&PAGE_LIST(page));
-- page->mapping = NULL;
-- __free_page(page);
-- }
--}
--
--int trace_dk(struct file *file, const char *buffer, unsigned long count,
-- void *data)
--{
-- char *name;
-- unsigned long off;
-- int rc;
--
-- name = kmalloc(count + 1, GFP_KERNEL);
-- if (name == NULL)
-- return -ENOMEM;
--
-- if (copy_from_user(name, buffer, count)) {
-- rc = -EFAULT;
-- goto out;
-- }
--
-- if (name[0] != '/') {
-- rc = -EINVAL;
-- goto out;
-- }
--
-- /* be nice and strip out trailing '\n' */
-- for (off = count ; off > 2 && isspace(name[off - 1]); off--)
-- ;
--
-- name[off] = '\0';
-- rc = tracefile_dump_all_pages(name);
--out:
-- if (name)
-- kfree(name);
-- return count;
--}
--EXPORT_SYMBOL(trace_dk);
--
--static int tracefiled(void *arg)
--{
-- struct page_collection pc;
-- struct tracefiled_ctl *tctl = arg;
-- struct list_head *pos, *tmp;
-- struct ptldebug_header *hdr;
-- struct file *filp;
-- struct page *page;
-- mm_segment_t oldfs;
-- int rc;
--
-- /* we're started late enough that we pick up init's fs context */
-- /* this is so broken in uml? what on earth is going on? */
-- kportal_daemonize("ktracefiled");
-- reparent_to_init();
--
-- spin_lock_init(&pc.pc_lock);
-- complete(&tctl->tctl_start);
--
-- while (1) {
-- wait_queue_t __wait;
--
-- init_waitqueue_entry(&__wait, current);
-- add_wait_queue(&tctl->tctl_waitq, &__wait);
-- set_current_state(TASK_INTERRUPTIBLE);
-- schedule_timeout(HZ);
-- remove_wait_queue(&tctl->tctl_waitq, &__wait);
--
-- if (atomic_read(&tctl->tctl_shutdown))
-- break;
--
-- pc.pc_want_daemon_pages = 0;
-- collect_pages(&pc);
-- if (list_empty(&pc.pc_pages))
-- continue;
--
-- filp = NULL;
-- down_read(&tracefile_sem);
-- if (tracefile != NULL) {
- filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
- 0600);
- filp = filp_open(tracefile, O_CREAT|O_RDWR|O_APPEND|O_LARGEFILE,
- 0600);
-- if (IS_ERR(filp)) {
-- printk("couldn't open %s: %ld\n", tracefile,
-- PTR_ERR(filp));
-- filp = NULL;
-- }
-- }
-- up_read(&tracefile_sem);
-- if (filp == NULL) {
-- put_pages_on_daemon_list(&pc);
-- continue;
-- }
--
-- oldfs = get_fs();
-- set_fs(get_ds());
--
-- /* mark the first header, so we can sort in chunks */
-- page = list_entry(pc.pc_pages.next, struct page,
-- PAGE_LIST_ENTRY);
-- LASSERT(page->index <= PAGE_SIZE);
-- LASSERT(page_count(page) > 0);
--
-- hdr = page_address(page);
-- hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
--
-- list_for_each_safe(pos, tmp, &pc.pc_pages) {
- static loff_t f_pos;
-- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-- LASSERT(page->index <= PAGE_SIZE);
-- LASSERT(page_count(page) > 0);
-
- if (f_pos >= tracefile_size)
- f_pos = 0;
- else if (f_pos > filp->f_dentry->d_inode->i_size)
- f_pos = filp->f_dentry->d_inode->i_size;
--
-- rc = filp->f_op->write(filp, page_address(page),
- page->index, &f_pos);
- page->index, &filp->f_pos);
-- if (rc != page->index) {
-- printk(KERN_WARNING "wanted to write %lu but "
-- "wrote %d\n", page->index, rc);
-- put_pages_back(&pc);
-- }
-- }
-- set_fs(oldfs);
-- filp_close(filp, 0);
--
-- put_pages_on_daemon_list(&pc);
-- }
-- complete(&tctl->tctl_stop);
-- return 0;
--}
--
--int trace_start_thread(void)
--{
-- struct tracefiled_ctl *tctl = &trace_tctl;
-- int rc = 0;
--
-- down(&trace_thread_sem);
-- if (thread_running)
-- goto out;
--
-- init_completion(&tctl->tctl_start);
-- init_completion(&tctl->tctl_stop);
-- init_waitqueue_head(&tctl->tctl_waitq);
-- atomic_set(&tctl->tctl_shutdown, 0);
--
-- if (kernel_thread(tracefiled, tctl, 0) < 0) {
-- rc = -ECHILD;
-- goto out;
-- }
--
-- wait_for_completion(&tctl->tctl_start);
-- thread_running = 1;
--out:
-- up(&trace_thread_sem);
-- return rc;
--}
--
--void trace_stop_thread(void)
--{
-- struct tracefiled_ctl *tctl = &trace_tctl;
--
-- down(&trace_thread_sem);
-- if (thread_running) {
-- printk(KERN_INFO "Shutting down debug daemon thread...\n");
-- atomic_set(&tctl->tctl_shutdown, 1);
-- wait_for_completion(&tctl->tctl_stop);
-- thread_running = 0;
-- }
-- up(&trace_thread_sem);
--}
--
--int trace_write_daemon_file(struct file *file, const char *buffer,
-- unsigned long count, void *data)
--{
-- char *name;
-- unsigned long off;
-- int rc;
--
-- name = kmalloc(count + 1, GFP_KERNEL);
-- if (name == NULL)
-- return -ENOMEM;
--
-- if (copy_from_user(name, buffer, count)) {
-- rc = -EFAULT;
-- goto out;
-- }
--
-- /* be nice and strip out trailing '\n' */
-- for (off = count ; off > 2 && isspace(name[off - 1]); off--)
-- ;
--
-- name[off] = '\0';
--
-- down_write(&tracefile_sem);
-- if (strcmp(name, "stop") == 0) {
-- tracefile = NULL;
-- trace_stop_thread();
- goto out_sem;
- } else if (strncmp(name, "size=", 5) == 0) {
- tracefile_size = simple_strtoul(name + 5, NULL, 0);
- if (tracefile_size < 10 || tracefile_size > 20480)
- tracefile_size = TRACEFILE_SIZE;
- else
- tracefile_size <<= 20;
-- goto out_sem;
-- }
--
-- if (name[0] != '/') {
-- rc = -EINVAL;
-- goto out_sem;
-- }
--
-- if (tracefile != NULL)
-- kfree(tracefile);
--
-- tracefile = name;
-- name = NULL;
-
- printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
- "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
-
-- trace_start_thread();
--
-- out_sem:
-- up_write(&tracefile_sem);
--
-- out:
- kfree(name);
- if (name)
- kfree(name);
-- return count;
--}
--
--int trace_read_daemon_file(char *page, char **start, off_t off, int count,
-- int *eof, void *data)
--{
-- int rc;
--
-- down_read(&tracefile_sem);
-- rc = snprintf(page, count, "%s", tracefile);
-- up_read(&tracefile_sem);
--
-- return rc;
--}
--
- int trace_write_debug_mb(struct file *file, const char *buffer,
- unsigned long count, void *data)
-int trace_write_debug_size(struct file *file, const char *buffer,
- unsigned long count, void *data)
--{
- char string[32];
- int i;
- unsigned max;
- char *string;
- int rc, i, max;
--
- if (count >= sizeof(string)) {
- printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
- count);
- return -EOVERFLOW;
- }
- string = kmalloc(count + 1, GFP_KERNEL);
- if (string == NULL)
- return -ENOMEM;
--
- if (copy_from_user(string, buffer, count))
- return -EFAULT;
- if (copy_from_user(string, buffer, count)) {
- rc = -EFAULT;
- goto out;
- }
--
-- max = simple_strtoul(string, NULL, 0);
- if (max == 0)
- return -EINVAL;
- if (max == 0) {
- rc = -EINVAL;
- goto out;
- }
-- max /= smp_num_cpus;
--
- if (max * smp_num_cpus > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5) {
- if (max > num_physpages / 5 * 4) {
-- printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
- "%d MB, which is more than 80%% of physical RAM "
- "(%lu).\n", max * smp_num_cpus,
- (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
- "%d pages, which is more than 80%% of physical pages "
- "(%lu).\n", max * smp_num_cpus, num_physpages / 5 * 4);
-- return count;
-- }
-
-- for (i = 0; i < NR_CPUS; i++) {
-- struct trace_cpu_data *tcd;
-- tcd = &trace_data[i].tcd;
- tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
- tcd->tcd_max_pages = max;
-- }
- out:
- kfree(string);
-- return count;
--}
--
- int trace_read_debug_mb(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-int trace_read_debug_size(char *page, char **start, off_t off, int count,
- int *eof, void *data)
--{
-- struct trace_cpu_data *tcd;
-- unsigned long flags;
-- int rc;
--
-- tcd = trace_get_tcd(flags);
- rc = snprintf(page, count, "%lu\n",
- tcd->tcd_max_pages * smp_num_cpus << (20 - PAGE_SHIFT));
- rc = snprintf(page, count, "%lu", tcd->tcd_max_pages);
-- trace_put_tcd(tcd, flags);
--
-- return rc;
--}
--
--int tracefile_init(void)
--{
-- struct trace_cpu_data *tcd;
-- int i;
--
-- for (i = 0; i < NR_CPUS; i++) {
-- tcd = &trace_data[i].tcd;
-- INIT_LIST_HEAD(&tcd->tcd_pages);
-- INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
-- tcd->tcd_cur_pages = 0;
-- tcd->tcd_cur_daemon_pages = 0;
-- tcd->tcd_max_pages = TCD_MAX_PAGES;
-- tcd->tcd_shutting_down = 0;
-- }
-- return 0;
--}
--
--static void trace_cleanup_on_cpu(void *info)
--{
-- struct trace_cpu_data *tcd;
-- struct list_head *pos, *tmp;
-- unsigned long flags;
--
-- tcd = trace_get_tcd(flags);
--
-- tcd->tcd_shutting_down = 1;
--
-- list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
-- struct page *page;
--
-- page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-- LASSERT(page->index <= PAGE_SIZE);
-- LASSERT(page_count(page) > 0);
--
-- list_del(&PAGE_LIST(page));
-- page->mapping = NULL;
-- __free_page(page);
-- }
-- tcd->tcd_cur_pages = 0;
--
-- trace_put_tcd(tcd, flags);
--}
--
--static void trace_cleanup(void)
--{
-- struct page_collection pc;
--
-- INIT_LIST_HEAD(&pc.pc_pages);
-- spin_lock_init(&pc.pc_lock);
--
-- trace_cleanup_on_cpu(&pc);
-- smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
--}
--
--void tracefile_exit(void)
--{
-- trace_stop_thread();
-- trace_cleanup();
--}
+++ /dev/null
--#ifndef __PORTALS_TRACEFILE_H
--#define __PORTALS_TRACEFILE_H
--
--int tracefile_dump_all_pages(char *filename);
--void trace_debug_print(void);
--void trace_flush_pages(void);
--int trace_start_thread(void);
--void trace_stop_thread(void);
--int tracefile_init(void);
--void tracefile_exit(void);
--int trace_write_daemon_file(struct file *file, const char *buffer,
-- unsigned long count, void *data);
--int trace_read_daemon_file(char *page, char **start, off_t off, int count,
-- int *eof, void *data);
- int trace_write_debug_mb(struct file *file, const char *buffer,
- unsigned long count, void *data);
- int trace_read_debug_mb(char *page, char **start, off_t off, int count,
- int *eof, void *data);
-int trace_write_debug_size(struct file *file, const char *buffer,
- unsigned long count, void *data);
-int trace_read_debug_size(char *page, char **start, off_t off, int count,
- int *eof, void *data);
--int trace_dk(struct file *file, const char *buffer, unsigned long count,
-- void *data);
--
--#endif /* __PORTALS_TRACEFILE_H */
+++ /dev/null
--.deps
--Makefile
--.*.cmd
--autoMakefile.in
--autoMakefile
--*.ko
--*.mod.c
--.*.flags
--.tmp_versions
--.depend
+++ /dev/null
- #MODULES := portals
- #portals-objs := api-eq.o api-init.o api-me.o api-errno.o api-ni.o api-wrap.o
- #portals-objs += lib-dispatch.o lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
- #portals-objs += lib-move.o lib-ni.o lib-pid.o
-MODULES := portals
-portals-objs := api-errno.o api-ni.o api-wrap.o
-portals-objs += lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
-portals-objs += lib-move.o lib-ni.o lib-pid.o module.o
--
--@INCLUDE_RULES@
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--include $(src)/../Kernelenv
--
--obj-y += portals.o
- portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
-portals-objs := lib-eq.o lib-init.o lib-md.o lib-me.o \
-- lib-move.o lib-msg.o lib-ni.o lib-pid.o \
- api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
- api-wrap.o module.o
- api-errno.o api-ni.o api-wrap.o \
- module.o
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * api/api-errno.c
-- * Instantiate the string table of errors
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- */
--
--/* If you change these, you must update the number table in portals/errno.h */
--const char *ptl_err_str[] = {
-- "PTL_OK",
-- "PTL_SEGV",
--
- "PTL_NOSPACE",
- "PTL_INUSE",
- "PTL_NO_SPACE",
- "PTL_ME_IN_USE",
-- "PTL_VAL_FAILED",
--
-- "PTL_NAL_FAILED",
- "PTL_NOINIT",
- "PTL_INIT_DUP",
- "PTL_INIT_INV",
- "PTL_AC_INV_INDEX",
- "PTL_NO_INIT",
- "PTL_IFACE_DUP",
- "PTL_IFACE_INVALID",
--
- "PTL_INV_ASIZE",
- "PTL_INV_HANDLE",
- "PTL_INV_MD",
- "PTL_INV_ME",
- "PTL_INV_NI",
- "PTL_HANDLE_INVALID",
- "PTL_MD_INVALID",
- "PTL_ME_INVALID",
--/* If you change these, you must update the number table in portals/errno.h */
- "PTL_ILL_MD",
- "PTL_INV_PROC",
- "PTL_INV_PSIZE",
- "PTL_INV_PTINDEX",
- "PTL_INV_REG",
- "PTL_PROCESS_INVALID",
- "PTL_PT_INDEX_INVALID",
--
- "PTL_INV_SR_INDX",
- "PTL_ML_TOOLONG",
- "PTL_ADDR_UNKNOWN",
- "PTL_INV_EQ",
- "PTL_SR_INDEX_INVALID",
- "PTL_EQ_INVALID",
-- "PTL_EQ_DROPPED",
--
-- "PTL_EQ_EMPTY",
- "PTL_NOUPDATE",
- "PTL_MD_NO_UPDATE",
-- "PTL_FAIL",
- "PTL_NOT_IMPLEMENTED",
- "PTL_NO_ACK",
--
- "PTL_IOV_TOO_MANY",
- "PTL_IOV_TOO_SMALL",
- "PTL_IOV_INVALID",
--
- "PTL_EQ_INUSE",
- "PTL_EQ_IN_USE",
-
- "PTL_NI_INVALID",
- "PTL_MD_ILLEGAL",
-
- "PTL_MAX_ERRNO"
--};
--/* If you change these, you must update the number table in portals/errno.h */
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * api/api-ni.c
-- * Network Interface code
-- *
-- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
-- * Copyright (c) 2001-2002 Sandia National Laboratories
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
-#define DEBUG_SUBSYSTEM S_PORTALS
--#include <portals/api-support.h>
-
-int ptl_init;
--
--/* Put some magic in the NI handle so uninitialised/zeroed handles are easy
-- * to spot */
--#define NI_HANDLE_MAGIC 0xebc0de00
--#define NI_HANDLE_MASK 0x000000ff
- #define MAX_NIS 8
- static nal_t *ptl_interfaces[MAX_NIS];
- int ptl_num_interfaces = 0;
-
-static struct nal_t *ptl_nal_table[NAL_MAX_NR + 1];
-
-#ifdef __KERNEL__
-DECLARE_MUTEX(ptl_mutex);
-
-static void ptl_mutex_enter (void)
-{
- down (&ptl_mutex);
-}
-
-static void ptl_mutex_exit (void)
-{
- up (&ptl_mutex);
-}
-#else
-static void ptl_mutex_enter (void)
-{
-}
-
-static void ptl_mutex_exit (void)
-{
-}
-#endif
--
--nal_t *ptl_hndl2nal(ptl_handle_any_t *handle)
--{
-- unsigned int idx = handle->nal_idx;
--
-- /* XXX we really rely on the caller NOT racing with interface
-- * setup/teardown. That ensures her NI handle can't get
-- * invalidated out from under her (or worse, swapped for a
-- * completely different interface!) */
-
- LASSERT (ptl_init);
--
-- if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0)
-- return NULL;
--
-- idx &= NI_HANDLE_MASK;
- if (idx < MAX_NIS)
- return ptl_interfaces[idx];
-
- if (idx > NAL_MAX_NR ||
- ptl_nal_table[idx] == NULL ||
- ptl_nal_table[idx]->nal_refct == 0)
- return NULL;
--
- return NULL;
- return ptl_nal_table[idx];
--}
--
- int ptl_ni_init(void)
-int ptl_register_nal (ptl_interface_t interface, nal_t *nal)
--{
- int i;
- int rc;
-
- ptl_mutex_enter();
-
- if (interface < 0 || interface > NAL_MAX_NR)
- rc = PTL_IFACE_INVALID;
- else if (ptl_nal_table[interface] != NULL)
- rc = PTL_IFACE_DUP;
- else {
- rc = PTL_OK;
- ptl_nal_table[interface] = nal;
- LASSERT(nal->nal_refct == 0);
- }
--
- LASSERT (MAX_NIS <= (NI_HANDLE_MASK + 1));
- ptl_mutex_exit();
- return (rc);
-}
-
-void ptl_unregister_nal (ptl_interface_t interface)
-{
- LASSERT(interface >= 0 && interface <= NAL_MAX_NR);
- LASSERT(ptl_nal_table[interface] != NULL);
- LASSERT(ptl_nal_table[interface]->nal_refct == 0);
--
- for (i = 0; i < MAX_NIS; i++)
- ptl_interfaces[i] = NULL;
- ptl_mutex_enter();
-
- ptl_nal_table[interface] = NULL;
--
- return PTL_OK;
- ptl_mutex_exit();
--}
--
- void ptl_ni_fini(void)
-int PtlInit(int *max_interfaces)
--{
- int i;
- LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
--
- for (i = 0; i < MAX_NIS; i++) {
- nal_t *nal = ptl_interfaces[i];
- if (!nal)
- continue;
- /* If this assertion fails, we need more bits in NI_HANDLE_MASK and
- * to shift NI_HANDLE_MAGIC left appropriately */
- LASSERT (NAL_MAX_NR < (NI_HANDLE_MASK + 1));
-
- if (max_interfaces != NULL)
- *max_interfaces = NAL_MAX_NR + 1;
--
- if (nal->shutdown)
- nal->shutdown(nal, i);
- }
- }
- ptl_mutex_enter();
--
- #ifdef __KERNEL__
- DECLARE_MUTEX(ptl_ni_init_mutex);
- if (!ptl_init) {
- /* NULL pointers, clear flags */
- memset(ptl_nal_table, 0, sizeof(ptl_nal_table));
-#ifndef __KERNEL__
- /* Kernel NALs register themselves when their module loads,
- * and unregister themselves when their module is unloaded.
- * Userspace NALs, are plugged in explicitly here... */
- {
- extern nal_t procapi_nal;
--
- static void ptl_ni_init_mutex_enter (void)
- {
- down (&ptl_ni_init_mutex);
- }
- /* XXX pretend it's socknal to keep liblustre happy... */
- ptl_nal_table[SOCKNAL] = &procapi_nal;
- LASSERT (procapi_nal.nal_refct == 0);
- }
-#endif
- ptl_init = 1;
- }
--
- static void ptl_ni_init_mutex_exit (void)
- {
- up (&ptl_ni_init_mutex);
- ptl_mutex_exit();
-
- return PTL_OK;
--}
--
- #else
- static void ptl_ni_init_mutex_enter (void)
-void PtlFini(void)
--{
- }
- nal_t *nal;
- int i;
--
- static void ptl_ni_init_mutex_exit (void)
- {
- }
- ptl_mutex_enter();
--
- #endif
- if (ptl_init) {
- for (i = 0; i <= NAL_MAX_NR; i++) {
--
- int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size,
- ptl_ac_index_t acl_size, ptl_pid_t requested_pid,
- ptl_handle_ni_t * handle)
- nal = ptl_nal_table[i];
- if (nal == NULL)
- continue;
-
- if (nal->nal_refct != 0) {
- CWARN("NAL %d has outstanding refcount %d\n",
- i, nal->nal_refct);
- nal->nal_ni_fini(nal);
- }
-
- ptl_nal_table[i] = NULL;
- }
-
- ptl_init = 0;
- }
-
- ptl_mutex_exit();
-}
-
-int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid,
- ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits,
- ptl_handle_ni_t *handle)
--{
-- nal_t *nal;
- int i;
- int i;
- int rc;
--
-- if (!ptl_init)
- return PTL_NOINIT;
-
- ptl_ni_init_mutex_enter ();
- return PTL_NO_INIT;
--
- nal = interface(ptl_num_interfaces, ptl_size, acl_size, requested_pid);
- ptl_mutex_enter ();
--
- if (!nal) {
- ptl_ni_init_mutex_exit ();
- return PTL_NAL_FAILED;
- if (interface == PTL_IFACE_DEFAULT) {
- for (i = 0; i <= NAL_MAX_NR; i++)
- if (ptl_nal_table[i] != NULL) {
- interface = i;
- break;
- }
- /* NB if no interfaces are registered, 'interface' will
- * fail the valid test below */
-- }
-
- for (i = 0; i < ptl_num_interfaces; i++) {
- if (ptl_interfaces[i] == nal) {
- nal->refct++;
- handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | i;
- CDEBUG(D_OTHER, "Returning existing NAL (%d)\n", i);
- ptl_ni_init_mutex_exit ();
- return PTL_OK;
- }
-
- if (interface < 0 ||
- interface > NAL_MAX_NR ||
- ptl_nal_table[interface] == NULL) {
- GOTO(out, rc = PTL_IFACE_INVALID);
-- }
- nal->refct = 1;
--
- if (ptl_num_interfaces >= MAX_NIS) {
- if (nal->shutdown)
- nal->shutdown (nal, ptl_num_interfaces);
- ptl_ni_init_mutex_exit ();
- return PTL_NOSPACE;
- }
- nal = ptl_nal_table[interface];
- nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
- nal->nal_handle.cookie = 0;
-
- CDEBUG(D_OTHER, "Starting up NAL (%d) refs %d\n", interface, nal->nal_refct);
- rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits);
--
- handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces;
- ptl_interfaces[ptl_num_interfaces++] = nal;
- if (rc != PTL_OK) {
- CERROR("Error %d starting up NAL %d, refs %d\n", rc,
- interface, nal->nal_refct);
- GOTO(out, rc);
- }
-
- if (nal->nal_refct != 0) {
- /* Caller gets to know if this was the first ref or not */
- rc = PTL_IFACE_DUP;
- }
-
- nal->nal_refct++;
- *handle = nal->nal_handle;
--
- ptl_eq_ni_init(nal);
- ptl_me_ni_init(nal);
- out:
- ptl_mutex_exit ();
--
- ptl_ni_init_mutex_exit ();
- return PTL_OK;
- return rc;
--}
-
--
--int PtlNIFini(ptl_handle_ni_t ni)
--{
-- nal_t *nal;
- int idx;
- int rc;
- int idx;
--
-- if (!ptl_init)
- return PTL_NOINIT;
- return PTL_NO_INIT;
--
- ptl_ni_init_mutex_enter ();
- ptl_mutex_enter ();
--
-- nal = ptl_hndl2nal (&ni);
-- if (nal == NULL) {
- ptl_ni_init_mutex_exit ();
- return PTL_INV_HANDLE;
- ptl_mutex_exit ();
- return PTL_HANDLE_INVALID;
-- }
--
-- idx = ni.nal_idx & NI_HANDLE_MASK;
-
- nal->refct--;
- if (nal->refct > 0) {
- ptl_ni_init_mutex_exit ();
- return PTL_OK;
- }
-
- ptl_me_ni_fini(nal);
- ptl_eq_ni_fini(nal);
-
- rc = PTL_OK;
- if (nal->shutdown)
- rc = nal->shutdown(nal, idx);
--
- ptl_interfaces[idx] = NULL;
- ptl_num_interfaces--;
- LASSERT(nal->nal_refct > 0);
--
- ptl_ni_init_mutex_exit ();
- return rc;
- }
- nal->nal_refct--;
--
- int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out)
- {
- *ni_out = handle_in;
- /* nal_refct == 0 tells nal->shutdown to really shut down */
- nal->nal_ni_fini(nal);
--
- ptl_mutex_exit ();
-- return PTL_OK;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * api/api-wrap.c
-- * User-level wrappers that dispatch across the protection boundaries
-- *
-- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
-- * Copyright (c) 2001-2002 Sandia National Laboratories
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--# define DEBUG_SUBSYSTEM S_PORTALS
--#include <portals/api-support.h>
--
- static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf,
- int argsize, void *retbuf, int retsize)
-void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
--{
- nal_t *nal;
-
- if (!ptl_init) {
- CERROR("Not initialized\n");
- return PTL_NOINIT;
- }
-
- nal = ptl_hndl2nal(&any_h);
- if (!nal)
- return PTL_INV_HANDLE;
-
- nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize);
- snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
-}
--
-int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out)
-{
- if (!ptl_init)
- return PTL_NO_INIT;
-
- if (ptl_hndl2nal(&handle_in) == NULL)
- return PTL_HANDLE_INVALID;
-
- *ni_out = handle_in;
-- return PTL_OK;
--}
--
--int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
--{
- PtlGetId_in args;
- PtlGetId_out ret;
- int rc;
-
- args.handle_in = ni_handle;
- nal_t *nal;
--
- rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- return rc;
- if (!ptl_init)
- return PTL_NO_INIT;
--
- if (id)
- *id = ret.id_out;
- nal = ptl_hndl2nal(&ni_handle);
- if (nal == NULL)
- return PTL_NI_INVALID;
--
- return ret.rc;
- return nal->nal_get_id(nal, id);
--}
--
- int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold)
-int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid)
--{
- PtlFailNid_in args;
- PtlFailNid_out ret;
- int rc;
-
- args.interface = interface;
- args.nid = nid;
- args.threshold = threshold;
- nal_t *nal;
-
- if (!ptl_init)
- return PTL_NO_INIT;
--
- rc = do_forward (interface, PTL_FAILNID,
- &args, sizeof(args), &ret, sizeof (ret));
- nal = ptl_hndl2nal(&ni_handle);
- if (nal == NULL)
- return PTL_NI_INVALID;
--
- return ((rc != PTL_OK) ? rc : ret.rc);
- /* We don't support different uids yet */
- *uid = 0;
- return PTL_OK;
--}
--
- int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
- ptl_sr_value_t * status_out)
-int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold)
--{
- PtlNIStatus_in args;
- PtlNIStatus_out ret;
- int rc;
-
- args.interface_in = interface_in;
- args.register_in = register_in;
-
- rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
- nal_t *nal;
--
- if (status_out)
- *status_out = ret.status_out;
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&interface);
- if (nal == NULL)
- return PTL_NI_INVALID;
--
- return ret.rc;
- return nal->nal_fail_nid(nal, nid, threshold);
--}
--
- int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
- unsigned long *distance_out)
-int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
- ptl_sr_value_t *status_out)
--{
- PtlNIDist_in args;
- PtlNIDist_out ret;
- int rc;
-
- args.interface_in = interface_in;
- args.process_in = process_in;
-
- rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
- nal_t *nal;
--
- if (distance_out)
- *distance_out = ret.distance_out;
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&interface_in);
- if (nal == NULL)
- return PTL_NI_INVALID;
--
- return ret.rc;
- return nal->nal_ni_status(nal, register_in, status_out);
--}
-
-
--
- unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in)
-int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
- unsigned long *distance_out)
--{
- PtlNIDebug_in args;
- PtlNIDebug_out ret;
- int rc;
-
- args.mask_in = mask_in;
-
- rc = do_forward(ni, PTL_NIDEBUG, &args, sizeof(args), &ret,
- sizeof(ret));
- nal_t *nal;
--
- if (rc != PTL_OK)
- return rc;
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&interface_in);
- if (nal == NULL)
- return PTL_NI_INVALID;
--
- return ret.rc;
- return nal->nal_ni_dist(nal, &process_in, distance_out);
--}
--
--int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
-- ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
-- ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
- ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out)
- ptl_ins_pos_t pos_in, ptl_handle_me_t *handle_out)
--{
- PtlMEAttach_in args;
- PtlMEAttach_out ret;
- int rc;
-
- args.interface_in = interface_in;
- args.index_in = index_in;
- args.match_id_in = match_id_in;
- args.match_bits_in = match_bits_in;
- args.ignore_bits_in = ignore_bits_in;
- args.unlink_in = unlink_in;
- args.position_in = pos_in;
-
- rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
- nal_t *nal;
--
- if (handle_out) {
- handle_out->nal_idx = interface_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&interface_in);
- if (nal == NULL)
- return PTL_NI_INVALID;
--
- return ret.rc;
- return nal->nal_me_attach(nal, index_in, match_id_in,
- match_bits_in, ignore_bits_in,
- unlink_in, pos_in, handle_out);
--}
--
--int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
-- ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in,
-- ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
-- ptl_handle_me_t * handle_out)
--{
- PtlMEInsert_in args;
- PtlMEInsert_out ret;
- int rc;
-
- args.current_in = current_in;
- args.match_id_in = match_id_in;
- args.match_bits_in = match_bits_in;
- args.ignore_bits_in = ignore_bits_in;
- args.unlink_in = unlink_in;
- args.position_in = position_in;
-
- rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret,
- sizeof(ret));
- nal_t *nal;
--
- if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(¤t_in);
- if (nal == NULL)
- return PTL_ME_INVALID;
--
- if (handle_out) {
- handle_out->nal_idx = current_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- return ret.rc;
- return nal->nal_me_insert(nal, ¤t_in, match_id_in,
- match_bits_in, ignore_bits_in,
- unlink_in, position_in, handle_out);
--}
--
--int PtlMEUnlink(ptl_handle_me_t current_in)
- {
- PtlMEUnlink_in args;
- PtlMEUnlink_out ret;
- int rc;
-
- args.current_in = current_in;
- args.unlink_in = PTL_RETAIN;
-
- rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
-
- return ret.rc;
- }
-
- int PtlTblDump(ptl_handle_ni_t ni, int index_in)
- {
- PtlTblDump_in args;
- PtlTblDump_out ret;
- int rc;
-
- args.index_in = index_in;
-
- rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return rc;
-
- return ret.rc;
- }
-
- int PtlMEDump(ptl_handle_me_t current_in)
- {
- PtlMEDump_in args;
- PtlMEDump_out ret;
- int rc;
-
- args.current_in = current_in;
-
- rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret,
- sizeof(ret));
-
- if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
-
- return ret.rc;
- }
-
- static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in)
--{
- nal_t *nal;
- int rc;
- int i;
-
- if (!ptl_init) {
- CERROR("PtlMDAttach/Bind/Update: Not initialized\n");
- return PTL_NOINIT;
- }
- nal_t *nal;
--
- if (!ptl_init)
- return PTL_NO_INIT;
-
-- nal = ptl_hndl2nal(¤t_in);
- if (!nal)
- return PTL_INV_HANDLE;
-
- if (nal->validate != NULL) /* nal->validate not a NOOP */
- {
- if ((md_in.options & PTL_MD_IOV) == 0) /* contiguous */
- {
- rc = nal->validate (nal, md_in.start, md_in.length);
- if (rc)
- return (PTL_SEGV);
- }
- else
- {
- struct iovec *iov = (struct iovec *)md_in.start;
-
- for (i = 0; i < md_in.niov; i++, iov++)
- {
- rc = nal->validate (nal, iov->iov_base, iov->iov_len);
- if (rc)
- return (PTL_SEGV);
- }
- }
- }
-
- return 0;
- }
- if (nal == NULL)
- return PTL_ME_INVALID;
--
- static ptl_handle_eq_t md2eq (ptl_md_t *md)
- {
- if (PtlHandleEqual (md->eventq, PTL_EQ_NONE))
- return (PTL_EQ_NONE);
-
- return (ptl_handle2usereq (&md->eventq)->cb_eq_handle);
- return nal->nal_me_unlink(nal, ¤t_in);
--}
-
--
--int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
-- ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
--{
- PtlMDAttach_in args;
- PtlMDAttach_out ret;
- int rc;
- nal_t *nal;
--
- rc = validate_md(me_in, md_in);
- if (rc == PTL_OK) {
- args.eq_in = md2eq(&md_in);
- args.me_in = me_in;
- args.md_in = md_in;
- args.unlink_in = unlink_in;
-
- rc = do_forward(me_in, PTL_MDATTACH,
- &args, sizeof(args), &ret, sizeof(ret));
- }
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&me_in);
- if (nal == NULL)
- return PTL_ME_INVALID;
--
- if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc;
- if (!PtlHandleIsEqual(md_in.eq_handle, PTL_EQ_NONE) &&
- ptl_hndl2nal(&md_in.eq_handle) != nal)
- return PTL_MD_ILLEGAL;
--
- if (handle_out) {
- handle_out->nal_idx = me_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- return ret.rc;
- return (nal->nal_md_attach)(nal, &me_in, &md_in,
- unlink_in, handle_out);
--}
-
-
--
--int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
- ptl_handle_md_t * handle_out)
- ptl_unlink_t unlink_in, ptl_handle_md_t *handle_out)
--{
- PtlMDBind_in args;
- PtlMDBind_out ret;
- int rc;
-
- rc = validate_md(ni_in, md_in);
- if (rc != PTL_OK)
- return rc;
-
- args.eq_in = md2eq(&md_in);
- args.ni_in = ni_in;
- args.md_in = md_in;
- nal_t *nal;
--
- rc = do_forward(ni_in, PTL_MDBIND,
- &args, sizeof(args), &ret, sizeof(ret));
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&ni_in);
- if (nal == NULL)
- return PTL_NI_INVALID;
--
- if (rc != PTL_OK)
- return rc;
- if (!PtlHandleIsEqual(md_in.eq_handle, PTL_EQ_NONE) &&
- ptl_hndl2nal(&md_in.eq_handle) != nal)
- return PTL_MD_ILLEGAL;
--
- if (handle_out) {
- handle_out->nal_idx = ni_in.nal_idx;
- handle_out->cookie = ret.handle_out.cookie;
- }
- return ret.rc;
- return (nal->nal_md_bind)(nal, &md_in, unlink_in, handle_out);
--}
--
--int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
-- ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
--{
- PtlMDUpdate_internal_in args;
- PtlMDUpdate_internal_out ret;
- int rc;
-
- args.md_in = md_in;
-
- if (old_inout) {
- args.old_inout = *old_inout;
- args.old_inout_valid = 1;
- } else
- args.old_inout_valid = 0;
-
- if (new_inout) {
- rc = validate_md (md_in, *new_inout);
- if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
- args.new_inout = *new_inout;
- args.new_inout_valid = 1;
- } else
- args.new_inout_valid = 0;
-
- if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) {
- args.testq_in = PTL_EQ_NONE;
- args.sequence_in = -1;
- } else {
- ptl_eq_t *eq = ptl_handle2usereq (&testq_in);
-
- args.testq_in = eq->cb_eq_handle;
- args.sequence_in = eq->sequence;
- }
-
- rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
- nal_t *nal;
-
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&md_in);
- if (nal == NULL)
- return PTL_MD_INVALID;
--
- if (old_inout)
- *old_inout = ret.old_inout;
- if (!PtlHandleIsEqual(testq_in, PTL_EQ_NONE) &&
- ptl_hndl2nal(&testq_in) != nal)
- return PTL_EQ_INVALID;
--
- return ret.rc;
- return (nal->nal_md_update)(nal, &md_in,
- old_inout, new_inout, &testq_in);
--}
--
--int PtlMDUnlink(ptl_handle_md_t md_in)
--{
- PtlMDUnlink_in args;
- PtlMDUnlink_out ret;
- int rc;
-
- args.md_in = md_in;
- rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc;
-
- return ret.rc;
- nal_t *nal;
-
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&md_in);
- if (nal == NULL)
- return PTL_MD_INVALID;
-
- return (nal->nal_md_unlink)(nal, &md_in);
--}
--
--int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
- int (*callback) (ptl_event_t * event),
- ptl_handle_eq_t * handle_out)
- ptl_eq_handler_t callback,
- ptl_handle_eq_t *handle_out)
--{
- ptl_eq_t *eq = NULL;
- ptl_event_t *ev = NULL;
- PtlEQAlloc_in args;
- PtlEQAlloc_out ret;
- int rc, i;
- nal_t *nal;
-
- nal_t *nal;
-
-- if (!ptl_init)
- return PTL_NOINIT;
- return PTL_NO_INIT;
--
- nal = ptl_hndl2nal (&interface);
- nal = ptl_hndl2nal(&interface);
-- if (nal == NULL)
- return PTL_INV_HANDLE;
-
- if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
- do { /* knock off all but the top bit... */
- count &= ~LOWEST_BIT_SET (count);
- } while (count != LOWEST_BIT_SET(count));
-
- count <<= 1; /* ...and round up */
- }
-
- if (count == 0) /* catch bad parameter / overflow on roundup */
- return (PTL_VAL_FAILED);
-
- PORTAL_ALLOC(ev, count * sizeof(ptl_event_t));
- if (!ev)
- return PTL_NOSPACE;
-
- for (i = 0; i < count; i++)
- ev[i].sequence = 0;
-
- if (nal->validate != NULL) {
- rc = nal->validate(nal, ev, count * sizeof(ptl_event_t));
- if (rc != PTL_OK)
- goto fail;
- }
-
- args.ni_in = interface;
- args.count_in = count;
- args.base_in = ev;
- args.len_in = count * sizeof(*ev);
- args.callback_in = callback;
-
- rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret,
- sizeof(ret));
- if (rc != PTL_OK)
- goto fail;
- if (ret.rc)
- GOTO(fail, rc = ret.rc);
- return PTL_NI_INVALID;
--
- PORTAL_ALLOC(eq, sizeof(*eq));
- if (!eq) {
- rc = PTL_NOSPACE;
- goto fail;
- }
- return (nal->nal_eq_alloc)(nal, count, callback, handle_out);
-}
--
- eq->sequence = 1;
- eq->size = count;
- eq->base = ev;
-int PtlEQFree(ptl_handle_eq_t eventq)
-{
- nal_t *nal;
--
- /* EQ handles are a little wierd. PtlEQGet() just looks at the
- * queued events in shared memory. It doesn't want to do_forward()
- * at all, so the cookie in the EQ handle we pass out of here is
- * simply a pointer to the event queue we just set up. We stash
- * the handle returned by do_forward(), so we can pass it back via
- * do_forward() when we need to. */
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&eventq);
- if (nal == NULL)
- return PTL_EQ_INVALID;
--
- eq->cb_eq_handle.nal_idx = interface.nal_idx;
- eq->cb_eq_handle.cookie = ret.handle_out.cookie;
- return (nal->nal_eq_free)(nal, &eventq);
-}
--
- handle_out->nal_idx = interface.nal_idx;
- handle_out->cookie = (__u64)((unsigned long)eq);
- return PTL_OK;
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev)
-{
- int which;
-
- return (PtlEQPoll (&eventq, 1, 0, ev, &which));
-}
--
- fail:
- PORTAL_FREE(ev, count * sizeof(ptl_event_t));
- return rc;
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
-{
- int which;
-
- return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER,
- event_out, &which));
--}
--
- int PtlEQFree(ptl_handle_eq_t eventq)
-int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
- ptl_event_t *event_out, int *which_out)
--{
- PtlEQFree_in args;
- PtlEQFree_out ret;
- ptl_eq_t *eq;
- int rc;
- int i;
- nal_t *nal;
--
- eq = ptl_handle2usereq (&eventq);
- args.eventq_in = eq->cb_eq_handle;
- if (!ptl_init)
- return PTL_NO_INIT;
--
- rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args,
- sizeof(args), &ret, sizeof(ret));
- if (neq_in < 1)
- return PTL_EQ_INVALID;
--
- /* XXX we're betting rc == PTL_OK here */
- PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t));
- PORTAL_FREE(eq, sizeof(*eq));
- nal = ptl_hndl2nal(&eventqs_in[0]);
- if (nal == NULL)
- return PTL_EQ_INVALID;
--
- return rc;
- for (i = 1; i < neq_in; i++)
- if (ptl_hndl2nal(&eventqs_in[i]) != nal)
- return PTL_EQ_INVALID;
-
- return (nal->nal_eq_poll)(nal, eventqs_in, neq_in, timeout,
- event_out, which_out);
--}
-
--
--int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
-- ptl_process_id_t match_id_in, ptl_pt_index_t portal_in)
--{
- PtlACEntry_in args;
- PtlACEntry_out ret;
- int rc;
-
- /*
- * Copy arguments into the argument block to
- * hand to the forwarding object
- */
- args.ni_in = ni_in;
- args.index_in = index_in;
- args.match_id_in = match_id_in;
- args.portal_in = portal_in;
-
- rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret,
- sizeof(ret));
- nal_t *nal;
--
- return (rc != PTL_OK) ? rc : ret.rc;
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&ni_in);
- if (nal == NULL)
- return PTL_NI_INVALID;
-
- return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in);
--}
--
--int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
-- ptl_process_id_t target_in, ptl_pt_index_t portal_in,
- ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
- ptl_ac_index_t ac_in, ptl_match_bits_t match_bits_in,
-- ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in)
--{
- PtlPut_in args;
- PtlPut_out ret;
- int rc;
-
- /*
- * Copy arguments into the argument block to
- * hand to the forwarding object
- */
- args.md_in = md_in;
- args.ack_req_in = ack_req_in;
- args.target_in = target_in;
- args.portal_in = portal_in;
- args.cookie_in = cookie_in;
- args.match_bits_in = match_bits_in;
- args.offset_in = offset_in;
- args.hdr_data_in = hdr_data_in;
- nal_t *nal;
--
- rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret));
- if (!ptl_init)
- return PTL_NO_INIT;
-
- nal = ptl_hndl2nal(&md_in);
- if (nal == NULL)
- return PTL_MD_INVALID;
--
- return (rc != PTL_OK) ? rc : ret.rc;
- return (nal->nal_put)(nal, &md_in, ack_req_in,
- &target_in, portal_in, ac_in,
- match_bits_in, offset_in, hdr_data_in);
--}
--
--int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
- ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
- ptl_pt_index_t portal_in, ptl_ac_index_t ac_in,
-- ptl_match_bits_t match_bits_in, ptl_size_t offset_in)
--{
- PtlGet_in args;
- PtlGet_out ret;
- int rc;
- nal_t *nal;
--
- /*
- * Copy arguments into the argument block to
- * hand to the forwarding object
- */
- args.md_in = md_in;
- args.target_in = target_in;
- args.portal_in = portal_in;
- args.cookie_in = cookie_in;
- args.match_bits_in = match_bits_in;
- args.offset_in = offset_in;
- if (!ptl_init)
- return PTL_NO_INIT;
--
- rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret));
- nal = ptl_hndl2nal(&md_in);
- if (nal == NULL)
- return PTL_MD_INVALID;
--
- return (rc != PTL_OK) ? rc : ret.rc;
- return (nal->nal_get)(nal, &md_in,
- &target_in, portal_in, ac_in,
- match_bits_in, offset_in);
--}
-
+++ /dev/null
--# Copyright (C) 2002 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
- my_sources = api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c \
- lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c \
-my_sources = api-errno.c api-ni.c api-wrap.c \
- lib-init.c lib-me.c lib-msg.c lib-eq.c \
-- lib-md.c lib-move.c lib-ni.c lib-pid.c
--
--if !CRAY_PORTALS
--
--if LIBLUSTRE
--noinst_LIBRARIES= libportals.a
--libportals_a_SOURCES= $(my_sources)
--libportals_a_CPPFLAGS = $(LLCPPFLAGS)
--libportals_a_CFLAGS = $(LLCFLAGS)
--endif
--
- #if MODULES
- #modulenet_DATA = portals$(KMODEXT)
- #endif # MODULES
-if MODULES
-modulenet_DATA = portals$(KMODEXT)
-endif # MODULES
--
--endif # CRAY_PORTALS
--
--MOSTLYCLEANFILES = *.o *.ko *.mod.c
- #DIST_SOURCES = $(portals-objs:%.o=%.c)
-DIST_SOURCES = $(portals-objs:%.o=%.c)
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * lib/lib-eq.c
-- * Library level Event queue management routines
-- *
-- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
-- * Copyright (c) 2001-2002 Sandia National Laboratories
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#define DEBUG_SUBSYSTEM S_PORTALS
--#include <portals/lib-p30.h>
- #include <portals/arg-blocks.h>
--
- int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args,
- void *v_ret)
-int
-lib_api_eq_alloc (nal_t *apinal, ptl_size_t count,
- ptl_eq_handler_t callback,
- ptl_handle_eq_t *handle)
--{
- /*
- * Incoming:
- * ptl_handle_ni_t ni_in
- * ptl_size_t count_in
- * void * base_in
- *
- * Outgoing:
- * ptl_handle_eq_t * handle_out
- */
- lib_nal_t *nal = apinal->nal_data;
- lib_eq_t *eq;
- unsigned long flags;
- int rc;
--
- PtlEQAlloc_in *args = v_args;
- PtlEQAlloc_out *ret = v_ret;
- /* We need count to be a power of 2 so that when eq_{enq,deq}_seq
- * overflow, they don't skip entries, so the queue has the same
- * apparant capacity at all times */
--
- lib_eq_t *eq;
- unsigned long flags;
- if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */
- do { /* knock off all but the top bit... */
- count &= ~LOWEST_BIT_SET (count);
- } while (count != LOWEST_BIT_SET(count));
--
- /* api should have rounded up */
- if (args->count_in != LOWEST_BIT_SET (args->count_in))
- return ret->rc = PTL_VAL_FAILED;
- count <<= 1; /* ...and round up */
- }
--
- if (count == 0) /* catch bad parameter / overflow on roundup */
- return (PTL_VAL_FAILED);
-
-- eq = lib_eq_alloc (nal);
-- if (eq == NULL)
- return (ret->rc = PTL_NOSPACE);
- return (PTL_NO_SPACE);
--
- state_lock(nal, &flags);
- PORTAL_ALLOC(eq->eq_events, count * sizeof(ptl_event_t));
- if (eq->eq_events == NULL) {
- LIB_LOCK(nal, flags);
- lib_eq_free (nal, eq);
- LIB_UNLOCK(nal, flags);
- }
--
- if (nal->cb_map != NULL) {
- if (nal->libnal_map != NULL) {
-- struct iovec iov = {
- .iov_base = args->base_in,
- .iov_len = args->count_in * sizeof (ptl_event_t) };
- .iov_base = eq->eq_events,
- .iov_len = count * sizeof(ptl_event_t)};
--
- ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey);
- if (ret->rc != PTL_OK) {
- rc = nal->libnal_map(nal, 1, &iov, &eq->eq_addrkey);
- if (rc != PTL_OK) {
- LIB_LOCK(nal, flags);
-- lib_eq_free (nal, eq);
-
- state_unlock (nal, &flags);
- return (ret->rc);
- LIB_UNLOCK(nal, flags);
- return (rc);
-- }
-- }
--
- eq->sequence = 1;
- eq->base = args->base_in;
- eq->size = args->count_in;
- /* NB this resets all event sequence numbers to 0, to be earlier
- * than eq_deq_seq */
- memset(eq->eq_events, 0, count * sizeof(ptl_event_t));
-
- eq->eq_deq_seq = 1;
- eq->eq_enq_seq = 1;
- eq->eq_size = count;
-- eq->eq_refcount = 0;
- eq->event_callback = args->callback_in;
- eq->eq_callback = callback;
-
- LIB_LOCK(nal, flags);
--
-- lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ);
- list_add (&eq->eq_list, &nal->ni.ni_active_eqs);
- list_add (&eq->eq_list, &nal->libnal_ni.ni_active_eqs);
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
- ptl_eq2handle(&ret->handle_out, eq);
- return (ret->rc = PTL_OK);
- ptl_eq2handle(handle, nal, eq);
- return (PTL_OK);
--}
--
- int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args,
- void *v_ret)
-int
-lib_api_eq_free(nal_t *apinal, ptl_handle_eq_t *eqh)
--{
- /*
- * Incoming:
- * ptl_handle_eq_t eventq_in
- *
- * Outgoing:
- */
-
- PtlEQFree_in *args = v_args;
- PtlEQFree_out *ret = v_ret;
- lib_eq_t *eq;
- long flags;
- lib_nal_t *nal = apinal->nal_data;
- lib_eq_t *eq;
- int size;
- ptl_event_t *events;
- void *addrkey;
- unsigned long flags;
--
- state_lock (nal, &flags);
- LIB_LOCK(nal, flags);
--
- eq = ptl_handle2eq(&args->eventq_in, nal);
- eq = ptl_handle2eq(eqh, nal);
-- if (eq == NULL) {
- ret->rc = PTL_INV_EQ;
- } else if (eq->eq_refcount != 0) {
- ret->rc = PTL_EQ_INUSE;
- LIB_UNLOCK(nal, flags);
- return (PTL_EQ_INVALID);
- }
-
- if (eq->eq_refcount != 0) {
- LIB_UNLOCK(nal, flags);
- return (PTL_EQ_IN_USE);
- }
-
- /* stash for free after lock dropped */
- events = eq->eq_events;
- size = eq->eq_size;
- addrkey = eq->eq_addrkey;
-
- lib_invalidate_handle (nal, &eq->eq_lh);
- list_del (&eq->eq_list);
- lib_eq_free (nal, eq);
-
- LIB_UNLOCK(nal, flags);
-
- if (nal->libnal_unmap != NULL) {
- struct iovec iov = {
- .iov_base = events,
- .iov_len = size * sizeof(ptl_event_t)};
-
- nal->libnal_unmap(nal, 1, &iov, &addrkey);
- }
-
- PORTAL_FREE(events, size * sizeof (ptl_event_t));
-
- return (PTL_OK);
-}
-
-int
-lib_get_event (lib_eq_t *eq, ptl_event_t *ev)
-{
- int new_index = eq->eq_deq_seq & (eq->eq_size - 1);
- ptl_event_t *new_event = &eq->eq_events[new_index];
- int rc;
- ENTRY;
-
- CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
- new_event, eq->eq_deq_seq, eq->eq_size);
-
- if (PTL_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) {
- RETURN(PTL_EQ_EMPTY);
- }
-
- /* We've got a new event... */
- *ev = *new_event;
-
- /* ...but did it overwrite an event we've not seen yet? */
- if (eq->eq_deq_seq == new_event->sequence) {
- rc = PTL_OK;
-- } else {
- if (nal->cb_unmap != NULL) {
- struct iovec iov = {
- .iov_base = eq->base,
- .iov_len = eq->size * sizeof (ptl_event_t) };
-
- nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey);
- CERROR("Event Queue Overflow: eq seq %lu ev seq %lu\n",
- eq->eq_deq_seq, new_event->sequence);
- rc = PTL_EQ_DROPPED;
- }
-
- eq->eq_deq_seq = new_event->sequence + 1;
- RETURN(rc);
-}
-
-
-int
-lib_api_eq_poll (nal_t *apinal,
- ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
- ptl_event_t *event, int *which)
-{
- lib_nal_t *nal = apinal->nal_data;
- lib_ni_t *ni = &nal->libnal_ni;
- unsigned long flags;
- int i;
- int rc;
-#ifdef __KERNEL__
- wait_queue_t wq;
- unsigned long now;
-#else
- struct timeval then;
- struct timeval now;
- struct timespec ts;
-#endif
- ENTRY;
-
- LIB_LOCK(nal, flags);
-
- for (;;) {
- for (i = 0; i < neq; i++) {
- lib_eq_t *eq = ptl_handle2eq(&eventqs[i], nal);
-
- rc = lib_get_event (eq, event);
- if (rc != PTL_EQ_EMPTY) {
- LIB_UNLOCK(nal, flags);
- *which = i;
- RETURN(rc);
- }
- }
-
- if (timeout_ms == 0) {
- LIB_UNLOCK (nal, flags);
- RETURN (PTL_EQ_EMPTY);
-- }
--
- lib_invalidate_handle (nal, &eq->eq_lh);
- list_del (&eq->eq_list);
- lib_eq_free (nal, eq);
- ret->rc = PTL_OK;
- }
- /* Some architectures force us to do spin locking/unlocking
- * in the same stack frame, means we can abstract the
- * locking here */
-#ifdef __KERNEL__
- init_waitqueue_entry(&wq, current);
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&ni->ni_waitq, &wq);
--
- state_unlock (nal, &flags);
- LIB_UNLOCK(nal, flags);
--
- return (ret->rc);
- if (timeout_ms < 0) {
- schedule ();
- } else {
- now = jiffies;
- schedule_timeout((timeout_ms * HZ)/1000);
- timeout_ms -= ((jiffies - now) * 1000)/HZ;
- if (timeout_ms < 0)
- timeout_ms = 0;
- }
-
- LIB_LOCK(nal, flags);
-#else
- if (timeout_ms < 0) {
- pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex);
- } else {
- gettimeofday(&then, NULL);
-
- ts.tv_sec = then.tv_sec + timeout_ms/1000;
- ts.tv_nsec = then.tv_usec * 1000 +
- (timeout_ms%1000) * 1000000;
- if (ts.tv_nsec >= 1000000000) {
- ts.tv_sec++;
- ts.tv_nsec -= 1000000000;
- }
-
- pthread_cond_timedwait(&ni->ni_cond,
- &ni->ni_mutex, &ts);
-
- gettimeofday(&now, NULL);
- timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
- (now.tv_usec - then.tv_usec) / 1000;
-
- if (timeout_ms < 0)
- timeout_ms = 0;
- }
-#endif
- }
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * lib/lib-init.c
-- * Start up the internal library and clear all structures
-- * Called by the NAL when it initializes. Safe to call multiple times.
-- *
-- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
-- * Copyright (c) 2001-2002 Sandia National Laboratories
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--# define DEBUG_SUBSYSTEM S_PORTALS
--#include <portals/lib-p30.h>
--
--#ifdef __KERNEL__
--# include <linux/string.h> /* for memset() */
--# include <linux/kp30.h>
--# ifdef KERNEL_ADDR_CACHE
--# include <compute/OS/addrCache/cache.h>
--# endif
--#else
--# include <string.h>
--# include <sys/time.h>
--#endif
--
--#ifndef PTL_USE_LIB_FREELIST
--
--int
- kportal_descriptor_setup (nal_cb_t *nal)
-kportal_descriptor_setup (lib_nal_t *nal,
- ptl_ni_limits_t *requested_limits,
- ptl_ni_limits_t *actual_limits)
--{
- /* Ignore requested limits! */
- actual_limits->max_mes = INT_MAX;
- actual_limits->max_mds = INT_MAX;
- actual_limits->max_eqs = INT_MAX;
-
-- return PTL_OK;
--}
--
--void
- kportal_descriptor_cleanup (nal_cb_t *nal)
-kportal_descriptor_cleanup (lib_nal_t *nal)
--{
--}
--#else
--
--int
- lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size)
-lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int n, int size)
--{
-- char *space;
--
-- LASSERT (n > 0);
--
-- size += offsetof (lib_freeobj_t, fo_contents);
--
- space = nal->cb_malloc (nal, n * size);
- PORTAL_ALLOC(space, n * size);
-- if (space == NULL)
- return (PTL_NOSPACE);
- return (PTL_NO_SPACE);
--
-- INIT_LIST_HEAD (&fl->fl_list);
-- fl->fl_objs = space;
-- fl->fl_nobjs = n;
-- fl->fl_objsize = size;
--
-- do
-- {
-- memset (space, 0, size);
-- list_add ((struct list_head *)space, &fl->fl_list);
-- space += size;
-- } while (--n != 0);
--
-- return (PTL_OK);
--}
--
--void
- lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl)
-lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl)
--{
-- struct list_head *el;
-- int count;
--
-- if (fl->fl_nobjs == 0)
-- return;
--
-- count = 0;
-- for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
-- count++;
--
-- LASSERT (count == fl->fl_nobjs);
--
- nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
- PORTAL_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
-- memset (fl, 0, sizeof (fl));
--}
--
--int
- kportal_descriptor_setup (nal_cb_t *nal)
-kportal_descriptor_setup (lib_nal_t *nal,
- ptl_ni_limits_t *requested_limits,
- ptl_ni_limits_t *actual_limits)
--{
-- /* NB on failure caller must still call kportal_descriptor_cleanup */
-- /* ****** */
- int rc;
- lib_ni_t *ni = &nal->libnal_ni;
- int rc;
--
- memset (&nal->ni.ni_free_mes, 0, sizeof (nal->ni.ni_free_mes));
- memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs));
- memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds));
- memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs));
- memset (&ni->ni_free_mes, 0, sizeof (ni->ni_free_mes));
- memset (&ni->ni_free_msgs, 0, sizeof (ni->ni_free_msgs));
- memset (&ni->ni_free_mds, 0, sizeof (ni->ni_free_mds));
- memset (&ni->ni_free_eqs, 0, sizeof (ni->ni_free_eqs));
--
- rc = lib_freelist_init (nal, &nal->ni.ni_free_mes,
- /* Ignore requested limits! */
- actual_limits->max_mes = MAX_MES;
- actual_limits->max_mds = MAX_MDS;
- actual_limits->max_eqs = MAX_EQS;
- /* Hahahah what a load of bollocks. There's nowhere to
- * specify the max # messages in-flight */
-
- rc = lib_freelist_init (nal, &ni->ni_free_mes,
-- MAX_MES, sizeof (lib_me_t));
-- if (rc != PTL_OK)
-- return (rc);
--
- rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs,
- rc = lib_freelist_init (nal, &ni->ni_free_msgs,
-- MAX_MSGS, sizeof (lib_msg_t));
-- if (rc != PTL_OK)
-- return (rc);
--
- rc = lib_freelist_init (nal, &nal->ni.ni_free_mds,
- rc = lib_freelist_init (nal, &ni->ni_free_mds,
-- MAX_MDS, sizeof (lib_md_t));
-- if (rc != PTL_OK)
-- return (rc);
--
- rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs,
- rc = lib_freelist_init (nal, &ni->ni_free_eqs,
-- MAX_EQS, sizeof (lib_eq_t));
-- return (rc);
--}
--
--void
- kportal_descriptor_cleanup (nal_cb_t *nal)
-kportal_descriptor_cleanup (lib_nal_t *nal)
--{
- lib_freelist_fini (nal, &nal->ni.ni_free_mes);
- lib_freelist_fini (nal, &nal->ni.ni_free_msgs);
- lib_freelist_fini (nal, &nal->ni.ni_free_mds);
- lib_freelist_fini (nal, &nal->ni.ni_free_eqs);
- lib_ni_t *ni = &nal->libnal_ni;
-
- lib_freelist_fini (nal, &ni->ni_free_mes);
- lib_freelist_fini (nal, &ni->ni_free_msgs);
- lib_freelist_fini (nal, &ni->ni_free_mds);
- lib_freelist_fini (nal, &ni->ni_free_eqs);
--}
--
--#endif
--
--__u64
- lib_create_interface_cookie (nal_cb_t *nal)
-lib_create_interface_cookie (lib_nal_t *nal)
--{
-- /* NB the interface cookie in wire handles guards against delayed
-- * replies and ACKs appearing valid in a new instance of the same
-- * interface. Initialisation time, even if it's only implemented
-- * to millisecond resolution is probably easily good enough. */
-- struct timeval tv;
-- __u64 cookie;
--#ifndef __KERNEL__
-- int rc = gettimeofday (&tv, NULL);
-- LASSERT (rc == 0);
--#else
-- do_gettimeofday(&tv);
--#endif
-- cookie = tv.tv_sec;
-- cookie *= 1000000;
-- cookie += tv.tv_usec;
-- return (cookie);
--}
--
--int
- lib_setup_handle_hash (nal_cb_t *nal)
-lib_setup_handle_hash (lib_nal_t *nal)
--{
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
-- int i;
--
-- /* Arbitrary choice of hash table size */
--#ifdef __KERNEL__
-- ni->ni_lh_hash_size = PAGE_SIZE / sizeof (struct list_head);
--#else
-- ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
--#endif
- ni->ni_lh_hash_table =
- (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size
- * sizeof (struct list_head));
- PORTAL_ALLOC(ni->ni_lh_hash_table,
- ni->ni_lh_hash_size * sizeof (struct list_head));
-- if (ni->ni_lh_hash_table == NULL)
- return (PTL_NOSPACE);
- return (PTL_NO_SPACE);
--
-- for (i = 0; i < ni->ni_lh_hash_size; i++)
-- INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
--
-- ni->ni_next_object_cookie = PTL_COOKIE_TYPES;
--
-- return (PTL_OK);
--}
--
--void
- lib_cleanup_handle_hash (nal_cb_t *nal)
-lib_cleanup_handle_hash (lib_nal_t *nal)
--{
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
--
-- if (ni->ni_lh_hash_table == NULL)
-- return;
--
- nal->cb_free (nal, ni->ni_lh_hash_table,
- ni->ni_lh_hash_size * sizeof (struct list_head));
- PORTAL_FREE(ni->ni_lh_hash_table,
- ni->ni_lh_hash_size * sizeof (struct list_head));
--}
--
--lib_handle_t *
- lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type)
-lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type)
--{
-- /* ALWAYS called with statelock held */
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
-- struct list_head *list;
-- struct list_head *el;
-- unsigned int hash;
--
-- if ((cookie & (PTL_COOKIE_TYPES - 1)) != type)
-- return (NULL);
--
-- hash = ((unsigned int)cookie) % ni->ni_lh_hash_size;
-- list = &ni->ni_lh_hash_table[hash];
--
-- list_for_each (el, list) {
-- lib_handle_t *lh = list_entry (el, lib_handle_t, lh_hash_chain);
--
-- if (lh->lh_cookie == cookie)
-- return (lh);
-- }
--
-- return (NULL);
--}
--
--void
- lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type)
-lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type)
--{
-- /* ALWAYS called with statelock held */
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
-- unsigned int hash;
--
-- LASSERT (type >= 0 && type < PTL_COOKIE_TYPES);
-- lh->lh_cookie = ni->ni_next_object_cookie | type;
-- ni->ni_next_object_cookie += PTL_COOKIE_TYPES;
--
-- hash = ((unsigned int)lh->lh_cookie) % ni->ni_lh_hash_size;
-- list_add (&lh->lh_hash_chain, &ni->ni_lh_hash_table[hash]);
--}
--
--void
- lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh)
-lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh)
--{
-- list_del (&lh->lh_hash_chain);
--}
--
--int
- lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize,
- ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size)
-lib_init(lib_nal_t *libnal, nal_t *apinal,
- ptl_process_id_t process_id,
- ptl_ni_limits_t *requested_limits,
- ptl_ni_limits_t *actual_limits)
--{
-- int rc = PTL_OK;
- lib_ni_t *ni = &nal->ni;
- int i;
- lib_ni_t *ni = &libnal->libnal_ni;
- int ptl_size;
- int i;
-- ENTRY;
--
-- /* NB serialised in PtlNIInit() */
-
- if (ni->refcnt != 0) { /* already initialised */
- ni->refcnt++;
- goto out;
- }
--
-- lib_assert_wire_constants ();
-
- /*
- * Allocate the portal table for this interface
- * and all per-interface objects.
- */
- memset(&ni->counters, 0, sizeof(lib_counters_t));
--
- rc = kportal_descriptor_setup (nal);
- /* Setup the API nal with the lib API handling functions */
- apinal->nal_get_id = lib_api_get_id;
- apinal->nal_ni_status = lib_api_ni_status;
- apinal->nal_ni_dist = lib_api_ni_dist;
- apinal->nal_fail_nid = lib_api_fail_nid;
- apinal->nal_me_attach = lib_api_me_attach;
- apinal->nal_me_insert = lib_api_me_insert;
- apinal->nal_me_unlink = lib_api_me_unlink;
- apinal->nal_md_attach = lib_api_md_attach;
- apinal->nal_md_bind = lib_api_md_bind;
- apinal->nal_md_unlink = lib_api_md_unlink;
- apinal->nal_md_update = lib_api_md_update;
- apinal->nal_eq_alloc = lib_api_eq_alloc;
- apinal->nal_eq_free = lib_api_eq_free;
- apinal->nal_eq_poll = lib_api_eq_poll;
- apinal->nal_put = lib_api_put;
- apinal->nal_get = lib_api_get;
-
- apinal->nal_data = libnal;
- ni->ni_api = apinal;
-
- rc = kportal_descriptor_setup (libnal, requested_limits,
- &ni->ni_actual_limits);
-- if (rc != PTL_OK)
-- goto out;
-
- memset(&ni->ni_counters, 0, sizeof(lib_counters_t));
--
-- INIT_LIST_HEAD (&ni->ni_active_msgs);
-- INIT_LIST_HEAD (&ni->ni_active_mds);
-- INIT_LIST_HEAD (&ni->ni_active_eqs);
-
-- INIT_LIST_HEAD (&ni->ni_test_peers);
--
- ni->ni_interface_cookie = lib_create_interface_cookie (nal);
-#ifdef __KERNEL__
- spin_lock_init (&ni->ni_lock);
- init_waitqueue_head (&ni->ni_waitq);
-#else
- pthread_mutex_init(&ni->ni_mutex, NULL);
- pthread_cond_init(&ni->ni_cond, NULL);
-#endif
-
- ni->ni_interface_cookie = lib_create_interface_cookie (libnal);
-- ni->ni_next_object_cookie = 0;
- rc = lib_setup_handle_hash (nal);
- rc = lib_setup_handle_hash (libnal);
-- if (rc != PTL_OK)
-- goto out;
--
- ni->nid = nid;
- ni->pid = pid;
- ni->ni_pid = process_id;
--
- ni->num_nodes = gsize;
- ni->tbl.size = ptl_size;
- if (requested_limits != NULL)
- ptl_size = requested_limits->max_pt_index + 1;
- else
- ptl_size = 64;
--
- ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size);
- if (ni->tbl.tbl == NULL) {
- rc = PTL_NOSPACE;
- ni->ni_portals.size = ptl_size;
- PORTAL_ALLOC(ni->ni_portals.tbl,
- ptl_size * sizeof(struct list_head));
- if (ni->ni_portals.tbl == NULL) {
- rc = PTL_NO_SPACE;
-- goto out;
-- }
--
-- for (i = 0; i < ptl_size; i++)
- INIT_LIST_HEAD(&(ni->tbl.tbl[i]));
- INIT_LIST_HEAD(&(ni->ni_portals.tbl[i]));
--
- ni->debug = PTL_DEBUG_NONE;
- ni->up = 1;
- ni->refcnt++;
- /* max_{mes,mds,eqs} set in kportal_descriptor_setup */
-
- /* We don't have an access control table! */
- ni->ni_actual_limits.max_ac_index = -1;
-
- ni->ni_actual_limits.max_pt_index = ptl_size - 1;
- ni->ni_actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
- ni->ni_actual_limits.max_me_list = INT_MAX;
-
- /* We don't support PtlGetPut! */
- ni->ni_actual_limits.max_getput_md = 0;
-
- if (actual_limits != NULL)
- *actual_limits = ni->ni_actual_limits;
--
-- out:
-- if (rc != PTL_OK) {
- lib_cleanup_handle_hash (nal);
- kportal_descriptor_cleanup (nal);
- lib_cleanup_handle_hash (libnal);
- kportal_descriptor_cleanup (libnal);
-- }
--
-- RETURN (rc);
--}
--
--int
- lib_fini(nal_cb_t * nal)
-lib_fini(lib_nal_t *nal)
--{
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
-- int idx;
-
- ni->refcnt--;
-
- if (ni->refcnt != 0)
- goto out;
--
- /* NB no stat_lock() since this is the last reference. The NAL
- /* NB no state_lock() since this is the last reference. The NAL
-- * should have shut down already, so it should be safe to unlink
-- * and free all descriptors, even those that appear committed to a
-- * network op (eg MD with non-zero pending count)
-- */
--
- for (idx = 0; idx < ni->tbl.size; idx++)
- while (!list_empty (&ni->tbl.tbl[idx])) {
- lib_me_t *me = list_entry (ni->tbl.tbl[idx].next,
- for (idx = 0; idx < ni->ni_portals.size; idx++)
- while (!list_empty (&ni->ni_portals.tbl[idx])) {
- lib_me_t *me = list_entry (ni->ni_portals.tbl[idx].next,
-- lib_me_t, me_list);
--
-- CERROR ("Active me %p on exit\n", me);
-- list_del (&me->me_list);
-- lib_me_free (nal, me);
-- }
--
-- while (!list_empty (&ni->ni_active_mds)) {
-- lib_md_t *md = list_entry (ni->ni_active_mds.next,
-- lib_md_t, md_list);
--
-- CERROR ("Active md %p on exit\n", md);
-- list_del (&md->md_list);
-- lib_md_free (nal, md);
-- }
--
-- while (!list_empty (&ni->ni_active_eqs)) {
-- lib_eq_t *eq = list_entry (ni->ni_active_eqs.next,
-- lib_eq_t, eq_list);
--
-- CERROR ("Active eq %p on exit\n", eq);
-- list_del (&eq->eq_list);
-- lib_eq_free (nal, eq);
-- }
--
-- while (!list_empty (&ni->ni_active_msgs)) {
-- lib_msg_t *msg = list_entry (ni->ni_active_msgs.next,
-- lib_msg_t, msg_list);
--
-- CERROR ("Active msg %p on exit\n", msg);
-- list_del (&msg->msg_list);
-- lib_msg_free (nal, msg);
-- }
--
- nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size);
- ni->up = 0;
- PORTAL_FREE(ni->ni_portals.tbl,
- ni->ni_portals.size * sizeof(struct list_head));
--
-- lib_cleanup_handle_hash (nal);
-- kportal_descriptor_cleanup (nal);
--
- out:
-#ifndef __KERNEL__
- pthread_mutex_destroy(&ni->ni_mutex);
- pthread_cond_destroy(&ni->ni_cond);
-#endif
-
-- return (PTL_OK);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * lib/lib-md.c
-- * Memory Descriptor management routines
-- *
-- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
-- * Copyright (c) 2001-2002 Sandia National Laboratories
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#ifndef __KERNEL__
--# include <stdio.h>
--#else
--# define DEBUG_SUBSYSTEM S_PORTALS
--# include <linux/kp30.h>
--#endif
--
--#include <portals/lib-p30.h>
- #include <portals/arg-blocks.h>
--
- /*
- * must be called with state lock held
- */
- void lib_md_unlink(nal_cb_t * nal, lib_md_t * md)
-/* must be called with state lock held */
-void
-lib_md_unlink(lib_nal_t *nal, lib_md_t *md)
--{
- lib_me_t *me = md->me;
- if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
- /* first unlink attempt... */
- lib_me_t *me = md->me;
-
- md->md_flags |= PTL_MD_FLAG_ZOMBIE;
-
- /* Disassociate from ME (if any), and unlink it if it was created
- * with PTL_UNLINK */
- if (me != NULL) {
- me->md = NULL;
- if (me->unlink == PTL_UNLINK)
- lib_me_unlink(nal, me);
- }
-
- /* emsure all future handle lookups fail */
- lib_invalidate_handle(nal, &md->md_lh);
- }
--
-- if (md->pending != 0) {
-- CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
- md->md_flags |= PTL_MD_FLAG_UNLINK;
-- return;
-- }
--
-- CDEBUG(D_NET, "Unlinking md %p\n", md);
--
-- if ((md->options & PTL_MD_KIOV) != 0) {
- if (nal->cb_unmap_pages != NULL)
- nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov,
- &md->md_addrkey);
- } else if (nal->cb_unmap != NULL)
- nal->cb_unmap (nal, md->md_niov, md->md_iov.iov,
- &md->md_addrkey);
-
- if (me) {
- me->md = NULL;
- if (me->unlink == PTL_UNLINK)
- lib_me_unlink(nal, me);
- if (nal->libnal_unmap_pages != NULL)
- nal->libnal_unmap_pages (nal,
- md->md_niov,
- md->md_iov.kiov,
- &md->md_addrkey);
- } else if (nal->libnal_unmap != NULL) {
- nal->libnal_unmap (nal,
- md->md_niov, md->md_iov.iov,
- &md->md_addrkey);
-- }
--
- if (md->eq != NULL)
- {
- if (md->eq != NULL) {
-- md->eq->eq_refcount--;
-- LASSERT (md->eq->eq_refcount >= 0);
-- }
--
- lib_invalidate_handle (nal, &md->md_lh);
-- list_del (&md->md_list);
-- lib_md_free(nal, md);
--}
--
--/* must be called with state lock held */
- static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private,
- ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink)
-static int
-lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink)
--{
- const int max_size_opts = PTL_MD_AUTO_UNLINK |
- PTL_MD_MAX_SIZE;
-- lib_eq_t *eq = NULL;
-- int rc;
-- int i;
- int niov;
- int total_length = 0;
--
-- /* NB we are passed an allocated, but uninitialised/active md.
-- * if we return success, caller may lib_md_unlink() it.
-- * otherwise caller may only lib_md_free() it.
-- */
--
- if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) {
- eq = ptl_handle2eq(eqh, nal);
- if (!PtlHandleIsEqual (umd->eq_handle, PTL_EQ_NONE)) {
- eq = ptl_handle2eq(&umd->eq_handle, nal);
-- if (eq == NULL)
- return PTL_INV_EQ;
- return PTL_EQ_INVALID;
-- }
-
- /* Must check this _before_ allocation. Also, note that non-iov
- * MDs must set md_niov to 0. */
- LASSERT((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0 ||
- md->niov <= PTL_MD_MAX_IOV);
--
- if ((md->options & max_size_opts) != 0 && /* max size used */
- (md->max_size < 0 || md->max_size > md->length)) // illegal max_size
- return PTL_INV_MD;
- /* This implementation doesn't know how to create START events or
- * disable END events. Best to LASSERT our caller is compliant so
- * we find out quickly... */
- LASSERT (eq == NULL ||
- ((umd->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
- (umd->options & PTL_MD_EVENT_END_DISABLE) == 0));
--
- new->me = NULL;
- new->start = md->start;
- new->length = md->length;
- new->offset = 0;
- new->max_size = md->max_size;
- new->unlink = unlink;
- new->options = md->options;
- new->user_ptr = md->user_ptr;
- new->eq = eq;
- new->threshold = md->threshold;
- new->pending = 0;
- new->md_flags = 0;
- lmd->me = NULL;
- lmd->start = umd->start;
- lmd->offset = 0;
- lmd->max_size = umd->max_size;
- lmd->options = umd->options;
- lmd->user_ptr = umd->user_ptr;
- lmd->eq = eq;
- lmd->threshold = umd->threshold;
- lmd->pending = 0;
- lmd->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
--
- if ((md->options & PTL_MD_IOV) != 0) {
- int total_length = 0;
- if ((umd->options & PTL_MD_IOVEC) != 0) {
--
- if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */
- return PTL_INV_MD;
- if ((umd->options & PTL_MD_KIOV) != 0) /* Can't specify both */
- return PTL_MD_ILLEGAL;
--
- new->md_niov = md->niov;
-
- if (nal->cb_read (nal, private, new->md_iov.iov, md->start,
- md->niov * sizeof (new->md_iov.iov[0])))
- return PTL_SEGV;
- lmd->md_niov = niov = umd->length;
- memcpy(lmd->md_iov.iov, umd->start,
- niov * sizeof (lmd->md_iov.iov[0]));
--
- for (i = 0; i < new->md_niov; i++) {
- for (i = 0; i < niov; i++) {
-- /* We take the base address on trust */
- if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */
- return PTL_VAL_FAILED;
- if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
- return PTL_MD_ILLEGAL;
--
- total_length += new->md_iov.iov[i].iov_len;
- total_length += lmd->md_iov.iov[i].iov_len;
-- }
--
- if (md->length > total_length)
- return PTL_IOV_TOO_SMALL;
-
- if (nal->cb_map != NULL) {
- rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov,
- &new->md_addrkey);
- lmd->length = total_length;
-
- if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) // illegal max_size
- return PTL_MD_ILLEGAL;
-
- if (nal->libnal_map != NULL) {
- rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
- &lmd->md_addrkey);
-- if (rc != PTL_OK)
-- return (rc);
-- }
- } else if ((md->options & PTL_MD_KIOV) != 0) {
- } else if ((umd->options & PTL_MD_KIOV) != 0) {
--#ifndef __KERNEL__
- return PTL_INV_MD;
- #else
- int total_length = 0;
-
- return PTL_MD_ILLEGAL;
-#else
-- /* Trap attempt to use paged I/O if unsupported early. */
- if (nal->cb_send_pages == NULL ||
- nal->cb_recv_pages == NULL)
- return PTL_INV_MD;
- if (nal->libnal_send_pages == NULL ||
- nal->libnal_recv_pages == NULL)
- return PTL_MD_INVALID;
--
- new->md_niov = md->niov;
- lmd->md_niov = niov = umd->length;
- memcpy(lmd->md_iov.kiov, umd->start,
- niov * sizeof (lmd->md_iov.kiov[0]));
--
- if (nal->cb_read (nal, private, new->md_iov.kiov, md->start,
- md->niov * sizeof (new->md_iov.kiov[0])))
- return PTL_SEGV;
-
- for (i = 0; i < new->md_niov; i++) {
- for (i = 0; i < niov; i++) {
-- /* We take the page pointer on trust */
- if (new->md_iov.kiov[i].kiov_offset +
- new->md_iov.kiov[i].kiov_len > PAGE_SIZE )
- if (lmd->md_iov.kiov[i].kiov_offset +
- lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE )
-- return PTL_VAL_FAILED; /* invalid length */
--
- total_length += new->md_iov.kiov[i].kiov_len;
- total_length += lmd->md_iov.kiov[i].kiov_len;
-- }
--
- if (md->length > total_length)
- return PTL_IOV_TOO_SMALL;
- lmd->length = total_length;
--
- if (nal->cb_map_pages != NULL) {
- rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov,
- &new->md_addrkey);
- if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > total_length)) // illegal max_size
- return PTL_MD_ILLEGAL;
-
- if (nal->libnal_map_pages != NULL) {
- rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov,
- &lmd->md_addrkey);
-- if (rc != PTL_OK)
-- return (rc);
-- }
--#endif
-- } else { /* contiguous */
- new->md_niov = 1;
- new->md_iov.iov[0].iov_base = md->start;
- new->md_iov.iov[0].iov_len = md->length;
- lmd->length = umd->length;
- lmd->md_niov = niov = 1;
- lmd->md_iov.iov[0].iov_base = umd->start;
- lmd->md_iov.iov[0].iov_len = umd->length;
--
- if (nal->cb_map != NULL) {
- rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov,
- &new->md_addrkey);
- if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
- (umd->max_size < 0 ||
- umd->max_size > umd->length)) // illegal max_size
- return PTL_MD_ILLEGAL;
-
- if (nal->libnal_map != NULL) {
- rc = nal->libnal_map (nal, niov, lmd->md_iov.iov,
- &lmd->md_addrkey);
-- if (rc != PTL_OK)
-- return (rc);
-- }
-- }
--
-- if (eq != NULL)
-- eq->eq_refcount++;
--
-- /* It's good; let handle2md succeed and add to active mds */
- lib_initialise_handle (nal, &new->md_lh, PTL_COOKIE_TYPE_MD);
- list_add (&new->md_list, &nal->ni.ni_active_mds);
- lib_initialise_handle (nal, &lmd->md_lh, PTL_COOKIE_TYPE_MD);
- list_add (&lmd->md_list, &nal->libnal_ni.ni_active_mds);
--
-- return PTL_OK;
--}
--
--/* must be called with state lock held */
- void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new)
-void
-lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd)
--{
-- /* NB this doesn't copy out all the iov entries so when a
-- * discontiguous MD is copied out, the target gets to know the
-- * original iov pointer (in start) and the number of entries it had
-- * and that's all.
-- */
- new->start = md->start;
- new->length = md->length;
- new->threshold = md->threshold;
- new->max_size = md->max_size;
- new->options = md->options;
- new->user_ptr = md->user_ptr;
- ptl_eq2handle(&new->eventq, md->eq);
- new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov;
- umd->start = lmd->start;
- umd->length = ((lmd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
- lmd->length : lmd->md_niov;
- umd->threshold = lmd->threshold;
- umd->max_size = lmd->max_size;
- umd->options = lmd->options;
- umd->user_ptr = lmd->user_ptr;
- ptl_eq2handle(&umd->eq_handle, nal, lmd->eq);
--}
--
- int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-int
-lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh,
- ptl_md_t *umd, ptl_unlink_t unlink,
- ptl_handle_md_t *handle)
--{
- /*
- * Incoming:
- * ptl_handle_me_t current_in
- * ptl_md_t md_in
- * ptl_unlink_t unlink_in
- *
- * Outgoing:
- * ptl_handle_md_t * handle_out
- */
-
- PtlMDAttach_in *args = v_args;
- PtlMDAttach_out *ret = v_ret;
- lib_me_t *me;
- lib_md_t *md;
- lib_nal_t *nal = apinal->nal_data;
- lib_me_t *me;
- lib_md_t *md;
-- unsigned long flags;
- int rc;
--
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
- args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
- return (ret->rc = PTL_IOV_TOO_MANY);
- if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
- umd->length > PTL_MD_MAX_IOV) /* too many fragments */
- return PTL_IOV_INVALID;
--
- md = lib_md_alloc(nal, &args->md_in);
- md = lib_md_alloc(nal, umd);
-- if (md == NULL)
- return (ret->rc = PTL_NOSPACE);
- return PTL_NO_SPACE;
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- me = ptl_handle2me(&args->me_in, nal);
- me = ptl_handle2me(meh, nal);
-- if (me == NULL) {
- ret->rc = PTL_INV_ME;
- rc = PTL_ME_INVALID;
-- } else if (me->md != NULL) {
- ret->rc = PTL_INUSE;
- rc = PTL_ME_IN_USE;
-- } else {
- ret->rc = lib_md_build(nal, md, private, &args->md_in,
- &args->eq_in, args->unlink_in);
-
- if (ret->rc == PTL_OK) {
- rc = lib_md_build(nal, md, umd, unlink);
- if (rc == PTL_OK) {
-- me->md = md;
-- md->me = me;
--
- ptl_md2handle(&ret->handle_out, md);
- ptl_md2handle(handle, nal, md);
--
- state_unlock (nal, &flags);
- LIB_UNLOCK(nal, flags);
-- return (PTL_OK);
-- }
-- }
--
-- lib_md_free (nal, md);
--
- state_unlock (nal, &flags);
- return (ret->rc);
- LIB_UNLOCK(nal, flags);
- return (rc);
--}
--
- int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-int
-lib_api_md_bind(nal_t *apinal,
- ptl_md_t *umd, ptl_unlink_t unlink,
- ptl_handle_md_t *handle)
--{
- /*
- * Incoming:
- * ptl_handle_ni_t ni_in
- * ptl_md_t md_in
- *
- * Outgoing:
- * ptl_handle_md_t * handle_out
- */
-
- PtlMDBind_in *args = v_args;
- PtlMDBind_out *ret = v_ret;
- lib_md_t *md;
- lib_nal_t *nal = apinal->nal_data;
- lib_md_t *md;
-- unsigned long flags;
- int rc;
--
- if ((args->md_in.options & (PTL_MD_KIOV | PTL_MD_IOV)) != 0 &&
- args->md_in.niov > PTL_MD_MAX_IOV) /* too many fragments */
- return (ret->rc = PTL_IOV_TOO_MANY);
- if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
- umd->length > PTL_MD_MAX_IOV) /* too many fragments */
- return PTL_IOV_INVALID;
--
- md = lib_md_alloc(nal, &args->md_in);
- md = lib_md_alloc(nal, umd);
-- if (md == NULL)
- return (ret->rc = PTL_NOSPACE);
- return PTL_NO_SPACE;
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- ret->rc = lib_md_build(nal, md, private,
- &args->md_in, &args->eq_in, PTL_UNLINK);
- rc = lib_md_build(nal, md, umd, unlink);
--
- if (ret->rc == PTL_OK) {
- ptl_md2handle(&ret->handle_out, md);
- if (rc == PTL_OK) {
- ptl_md2handle(handle, nal, md);
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
-- return (PTL_OK);
-- }
--
-- lib_md_free (nal, md);
--
- state_unlock(nal, &flags);
- return (ret->rc);
- LIB_UNLOCK(nal, flags);
- return (rc);
--}
--
- int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-int
-lib_api_md_unlink (nal_t *apinal, ptl_handle_md_t *mdh)
--{
- PtlMDUnlink_in *args = v_args;
- PtlMDUnlink_out *ret = v_ret;
- lib_nal_t *nal = apinal->nal_data;
-- ptl_event_t ev;
-- lib_md_t *md;
-- unsigned long flags;
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- md = ptl_handle2md(&args->md_in, nal);
- md = ptl_handle2md(mdh, nal);
-- if (md == NULL) {
- state_unlock(nal, &flags);
- return (ret->rc = PTL_INV_MD);
- LIB_UNLOCK(nal, flags);
- return PTL_MD_INVALID;
-- }
--
-- /* If the MD is busy, lib_md_unlink just marks it for deletion, and
-- * when the NAL is done, the completion event flags that the MD was
-- * unlinked. Otherwise, we enqueue an event now... */
--
-- if (md->eq != NULL &&
-- md->pending == 0) {
-- memset(&ev, 0, sizeof(ev));
--
-- ev.type = PTL_EVENT_UNLINK;
- ev.status = PTL_OK;
- ev.ni_fail_type = PTL_OK;
-- ev.unlinked = 1;
- lib_md_deconstruct(nal, md, &ev.mem_desc);
- lib_md_deconstruct(nal, md, &ev.md);
- ptl_md2handle(&ev.md_handle, nal, md);
--
- lib_enq_event_locked(nal, private, md->eq, &ev);
- lib_enq_event_locked(nal, NULL, md->eq, &ev);
-- }
--
- lib_md_deconstruct(nal, md, &ret->status_out);
-- lib_md_unlink(nal, md);
- ret->rc = PTL_OK;
-
- state_unlock(nal, &flags);
--
- return (PTL_OK);
- LIB_UNLOCK(nal, flags);
- return PTL_OK;
--}
--
- int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args,
- void *v_ret)
-int
-lib_api_md_update (nal_t *apinal,
- ptl_handle_md_t *mdh,
- ptl_md_t *oldumd, ptl_md_t *newumd,
- ptl_handle_eq_t *testqh)
--{
- /*
- * Incoming:
- * ptl_handle_md_t md_in
- * ptl_md_t * old_inout
- * ptl_md_t * new_inout
- * ptl_handle_eq_t testq_in
- * ptl_seq_t sequence_in
- *
- * Outgoing:
- * ptl_md_t * old_inout
- * ptl_md_t * new_inout
- */
- PtlMDUpdate_internal_in *args = v_args;
- PtlMDUpdate_internal_out *ret = v_ret;
- lib_md_t *md;
- lib_eq_t *test_eq = NULL;
- ptl_md_t *new = &args->new_inout;
- lib_nal_t *nal = apinal->nal_data;
- lib_md_t *md;
- lib_eq_t *test_eq = NULL;
-- unsigned long flags;
- int rc;
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- md = ptl_handle2md(&args->md_in, nal);
- md = ptl_handle2md(mdh, nal);
-- if (md == NULL) {
- ret->rc = PTL_INV_MD;
- rc = PTL_MD_INVALID;
-- goto out;
-- }
-
- if (args->old_inout_valid)
- lib_md_deconstruct(nal, md, &ret->old_inout);
--
- if (!args->new_inout_valid) {
- ret->rc = PTL_OK;
- goto out;
- }
- if (oldumd != NULL)
- lib_md_deconstruct(nal, md, oldumd);
--
- /* XXX fttb, the new MD must be the same type wrt fragmentation */
- if (((new->options ^ md->options) &
- (PTL_MD_IOV | PTL_MD_KIOV)) != 0) {
- ret->rc = PTL_INV_MD;
- if (newumd == NULL) {
- rc = PTL_OK;
-- goto out;
-- }
--
- if (new->niov > md->md_niov) {
- ret->rc = PTL_IOV_TOO_MANY;
- /* XXX fttb, the new MD must be the same "shape" wrt fragmentation,
- * since we simply overwrite the old lib-md */
- if ((((newumd->options ^ md->options) &
- (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) ||
- ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 &&
- newumd->length != md->md_niov)) {
- rc = PTL_IOV_INVALID;
-- goto out;
-- }
-
- if (new->niov < md->md_niov) {
- ret->rc = PTL_IOV_TOO_SMALL;
- goto out;
- }
--
- if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) {
- test_eq = ptl_handle2eq(&args->testq_in, nal);
- if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) {
- test_eq = ptl_handle2eq(testqh, nal);
-- if (test_eq == NULL) {
- ret->rc = PTL_INV_EQ;
- rc = PTL_EQ_INVALID;
-- goto out;
-- }
-- }
--
-- if (md->pending != 0) {
- ret->rc = PTL_NOUPDATE;
- goto out;
- rc = PTL_MD_NO_UPDATE;
- goto out;
-- }
--
-- if (test_eq == NULL ||
- test_eq->sequence == args->sequence_in) {
- test_eq->eq_deq_seq == test_eq->eq_enq_seq) {
-- lib_me_t *me = md->me;
- int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
- PTL_UNLINK : PTL_RETAIN;
--
-- // #warning this does not track eq refcounts properly
- ret->rc = lib_md_build(nal, md, private,
- new, &new->eventq, md->unlink);
- rc = lib_md_build(nal, md, newumd, unlink);
--
-- md->me = me;
-- } else {
- ret->rc = PTL_NOUPDATE;
- rc = PTL_MD_NO_UPDATE;
-- }
--
-- out:
- state_unlock(nal, &flags);
- return (ret->rc);
- LIB_UNLOCK(nal, flags);
-
- return rc;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * lib/lib-me.c
-- * Match Entry management routines
-- *
-- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
-- * Copyright (c) 2001-2002 Sandia National Laboratories
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#ifndef __KERNEL__
--# include <stdio.h>
--#else
--# define DEBUG_SUBSYSTEM S_PORTALS
--# include <linux/kp30.h>
--#endif
--
--#include <portals/lib-p30.h>
- #include <portals/arg-blocks.h>
-
- static void lib_me_dump(nal_cb_t * nal, lib_me_t * me);
--
- int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-int
-lib_api_me_attach(nal_t *apinal,
- ptl_pt_index_t portal,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits,
- ptl_match_bits_t ignore_bits,
- ptl_unlink_t unlink, ptl_ins_pos_t pos,
- ptl_handle_me_t *handle)
--{
- PtlMEAttach_in *args = v_args;
- PtlMEAttach_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
- lib_ptl_t *tbl = &ni->tbl;
- lib_nal_t *nal = apinal->nal_data;
- lib_ni_t *ni = &nal->libnal_ni;
- lib_ptl_t *tbl = &ni->ni_portals;
- lib_me_t *me;
-- unsigned long flags;
- lib_me_t *me;
--
- if (args->index_in >= tbl->size)
- return ret->rc = PTL_INV_PTINDEX;
- if (portal >= tbl->size)
- return PTL_PT_INDEX_INVALID;
--
-- /* Should check for valid matchid, but not yet */
- if (0)
- return ret->rc = PTL_INV_PROC;
--
-- me = lib_me_alloc (nal);
-- if (me == NULL)
- return (ret->rc = PTL_NOSPACE);
- return PTL_NO_SPACE;
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- me->match_id = args->match_id_in;
- me->match_bits = args->match_bits_in;
- me->ignore_bits = args->ignore_bits_in;
- me->unlink = args->unlink_in;
- me->match_id = match_id;
- me->match_bits = match_bits;
- me->ignore_bits = ignore_bits;
- me->unlink = unlink;
-- me->md = NULL;
--
-- lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME);
--
- if (args->position_in == PTL_INS_AFTER)
- list_add_tail(&me->me_list, &(tbl->tbl[args->index_in]));
- if (pos == PTL_INS_AFTER)
- list_add_tail(&me->me_list, &(tbl->tbl[portal]));
-- else
- list_add(&me->me_list, &(tbl->tbl[args->index_in]));
- list_add(&me->me_list, &(tbl->tbl[portal]));
--
- ptl_me2handle(&ret->handle_out, me);
- ptl_me2handle(handle, nal, me);
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
- return ret->rc = PTL_OK;
- return PTL_OK;
--}
--
- int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-int
-lib_api_me_insert(nal_t *apinal,
- ptl_handle_me_t *current_meh,
- ptl_process_id_t match_id,
- ptl_match_bits_t match_bits,
- ptl_match_bits_t ignore_bits,
- ptl_unlink_t unlink, ptl_ins_pos_t pos,
- ptl_handle_me_t *handle)
--{
- PtlMEInsert_in *args = v_args;
- PtlMEInsert_out *ret = v_ret;
- lib_nal_t *nal = apinal->nal_data;
- lib_me_t *current_me;
- lib_me_t *new_me;
-- unsigned long flags;
- lib_me_t *me;
- lib_me_t *new;
--
- new = lib_me_alloc (nal);
- if (new == NULL)
- return (ret->rc = PTL_NOSPACE);
- new_me = lib_me_alloc (nal);
- if (new_me == NULL)
- return PTL_NO_SPACE;
--
-- /* Should check for valid matchid, but not yet */
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- me = ptl_handle2me(&args->current_in, nal);
- if (me == NULL) {
- lib_me_free (nal, new);
- current_me = ptl_handle2me(current_meh, nal);
- if (current_me == NULL) {
- lib_me_free (nal, new_me);
--
- state_unlock (nal, &flags);
- return (ret->rc = PTL_INV_ME);
- LIB_UNLOCK(nal, flags);
- return PTL_ME_INVALID;
-- }
--
- new->match_id = args->match_id_in;
- new->match_bits = args->match_bits_in;
- new->ignore_bits = args->ignore_bits_in;
- new->unlink = args->unlink_in;
- new->md = NULL;
- new_me->match_id = match_id;
- new_me->match_bits = match_bits;
- new_me->ignore_bits = ignore_bits;
- new_me->unlink = unlink;
- new_me->md = NULL;
--
- lib_initialise_handle (nal, &new->me_lh, PTL_COOKIE_TYPE_ME);
- lib_initialise_handle (nal, &new_me->me_lh, PTL_COOKIE_TYPE_ME);
--
- if (args->position_in == PTL_INS_AFTER)
- list_add_tail(&new->me_list, &me->me_list);
- if (pos == PTL_INS_AFTER)
- list_add_tail(&new_me->me_list, ¤t_me->me_list);
-- else
- list_add(&new->me_list, &me->me_list);
- list_add(&new_me->me_list, ¤t_me->me_list);
--
- ptl_me2handle(&ret->handle_out, new);
- ptl_me2handle(handle, nal, new_me);
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
- return ret->rc = PTL_OK;
- return PTL_OK;
--}
--
- int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-int
-lib_api_me_unlink (nal_t *apinal, ptl_handle_me_t *meh)
--{
- PtlMEUnlink_in *args = v_args;
- PtlMEUnlink_out *ret = v_ret;
- lib_nal_t *nal = apinal->nal_data;
-- unsigned long flags;
- lib_me_t *me;
- lib_me_t *me;
- int rc;
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- me = ptl_handle2me(&args->current_in, nal);
- me = ptl_handle2me(meh, nal);
-- if (me == NULL) {
- ret->rc = PTL_INV_ME;
- rc = PTL_ME_INVALID;
-- } else {
-- lib_me_unlink(nal, me);
- ret->rc = PTL_OK;
- rc = PTL_OK;
-- }
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
- return (ret->rc);
- return (rc);
--}
--
--/* call with state_lock please */
- void lib_me_unlink(nal_cb_t *nal, lib_me_t *me)
-void
-lib_me_unlink(lib_nal_t *nal, lib_me_t *me)
--{
- lib_ni_t *ni = &nal->ni;
-
- if (ni->debug & PTL_DEBUG_UNLINK) {
- ptl_handle_any_t handle;
- ptl_me2handle(&handle, me);
- }
-
-- list_del (&me->me_list);
--
-- if (me->md) {
-- me->md->me = NULL;
-- lib_md_unlink(nal, me->md);
-- }
--
-- lib_invalidate_handle (nal, &me->me_lh);
-- lib_me_free(nal, me);
- }
-
- int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
- {
- PtlTblDump_in *args = v_args;
- PtlTblDump_out *ret = v_ret;
- lib_ptl_t *tbl = &nal->ni.tbl;
- ptl_handle_any_t handle;
- struct list_head *tmp;
- unsigned long flags;
-
- if (args->index_in < 0 || args->index_in >= tbl->size)
- return ret->rc = PTL_INV_PTINDEX;
-
- nal->cb_printf(nal, "Portal table index %d\n", args->index_in);
-
- state_lock(nal, &flags);
- list_for_each(tmp, &(tbl->tbl[args->index_in])) {
- lib_me_t *me = list_entry(tmp, lib_me_t, me_list);
- ptl_me2handle(&handle, me);
- lib_me_dump(nal, me);
- }
- state_unlock(nal, &flags);
-
- return ret->rc = PTL_OK;
- }
-
- int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
- {
- PtlMEDump_in *args = v_args;
- PtlMEDump_out *ret = v_ret;
- lib_me_t *me;
- unsigned long flags;
-
- state_lock(nal, &flags);
-
- me = ptl_handle2me(&args->current_in, nal);
- if (me == NULL) {
- ret->rc = PTL_INV_ME;
- } else {
- lib_me_dump(nal, me);
- ret->rc = PTL_OK;
- }
-
- state_unlock(nal, &flags);
-
- return ret->rc;
--}
--
- static void lib_me_dump(nal_cb_t * nal, lib_me_t * me)
-#if 0
-static void
-lib_me_dump(lib_nal_t *nal, lib_me_t * me)
--{
- nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me,
- me->me_lh.lh_cookie);
- CWARN("Match Entry %p ("LPX64")\n", me,
- me->me_lh.lh_cookie);
--
- nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n",
- me->match_bits, me->ignore_bits);
- CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
- me->match_bits, me->ignore_bits);
--
- nal->cb_printf(nal, "\tMD\t= %p\n", me->md);
- nal->cb_printf(nal, "\tprev\t= %p\n",
- list_entry(me->me_list.prev, lib_me_t, me_list));
- nal->cb_printf(nal, "\tnext\t= %p\n",
- list_entry(me->me_list.next, lib_me_t, me_list));
- CWARN("\tMD\t= %p\n", me->md);
- CWARN("\tprev\t= %p\n",
- list_entry(me->me_list.prev, lib_me_t, me_list));
- CWARN("\tnext\t= %p\n",
- list_entry(me->me_list.next, lib_me_t, me_list));
--}
-#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * lib/lib-move.c
-- * Data movement routines
-- *
-- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
-- * Copyright (c) 2001-2002 Sandia National Laboratories
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#ifndef __KERNEL__
--# include <stdio.h>
--#else
--# define DEBUG_SUBSYSTEM S_PORTALS
--# include <linux/kp30.h>
--#endif
--#include <portals/p30.h>
--#include <portals/lib-p30.h>
- #include <portals/arg-blocks.h>
--
- /*
- * Right now it does not check access control lists.
- *
- * We only support one MD per ME, which is how the Portals 3.1 spec is written.
- * All previous complication is removed.
- */
-/* forward ref */
-static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg);
--
- static lib_me_t *
- lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid,
- ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset,
- ptl_match_bits_t match_bits, ptl_size_t *mlength_out,
- ptl_size_t *offset_out, int *unlink_out)
-static lib_md_t *
-lib_match_md(lib_nal_t *nal, int index, int op_mask,
- ptl_nid_t src_nid, ptl_pid_t src_pid,
- ptl_size_t rlength, ptl_size_t roffset,
- ptl_match_bits_t match_bits, lib_msg_t *msg,
- ptl_size_t *mlength_out, ptl_size_t *offset_out)
--{
- lib_ni_t *ni = &nal->ni;
- struct list_head *match_list = &ni->tbl.tbl[index];
- lib_ni_t *ni = &nal->libnal_ni;
- struct list_head *match_list = &ni->ni_portals.tbl[index];
-- struct list_head *tmp;
-- lib_me_t *me;
-- lib_md_t *md;
-- ptl_size_t mlength;
-- ptl_size_t offset;
-
-- ENTRY;
--
-- CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
-- "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits);
--
- if (index < 0 || index >= ni->tbl.size) {
- if (index < 0 || index >= ni->ni_portals.size) {
-- CERROR("Invalid portal %d not in [0-%d]\n",
- index, ni->tbl.size);
- index, ni->ni_portals.size);
-- goto failed;
-- }
--
-- list_for_each (tmp, match_list) {
-- me = list_entry(tmp, lib_me_t, me_list);
-- md = me->md;
--
-- /* ME attached but MD not attached yet */
-- if (md == NULL)
-- continue;
--
-- LASSERT (me == md->me);
-
- /* MD deactivated */
- if (md->threshold == 0)
- continue;
--
-- /* mismatched MD op */
-- if ((md->options & op_mask) == 0)
- continue;
-
- /* MD exhausted */
- if (lib_md_exhausted(md))
-- continue;
--
-- /* mismatched ME nid/pid? */
-- if (me->match_id.nid != PTL_NID_ANY &&
-- me->match_id.nid != src_nid)
-- continue;
-
- CDEBUG(D_NET,"match_id.pid [%x], src_pid [%x]\n", me->match_id.pid, src_pid);
--
-- if (me->match_id.pid != PTL_PID_ANY &&
-- me->match_id.pid != src_pid)
-- continue;
--
-- /* mismatched ME matchbits? */
-- if (((me->match_bits ^ match_bits) & ~me->ignore_bits) != 0)
-- continue;
--
-- /* Hurrah! This _is_ a match; check it out... */
--
-- if ((md->options & PTL_MD_MANAGE_REMOTE) == 0)
-- offset = md->offset;
-- else
-- offset = roffset;
--
- mlength = md->length - offset;
- if ((md->options & PTL_MD_MAX_SIZE) != 0 &&
- mlength > md->max_size)
- if ((md->options & PTL_MD_MAX_SIZE) != 0) {
-- mlength = md->max_size;
- LASSERT (md->offset + mlength <= md->length);
- } else {
- mlength = md->length - offset;
- }
--
-- if (rlength <= mlength) { /* fits in allowed space */
-- mlength = rlength;
-- } else if ((md->options & PTL_MD_TRUNCATE) == 0) {
-- /* this packet _really_ is too big */
-- CERROR("Matching packet %d too big: %d left, "
-- "%d allowed\n", rlength, md->length - offset,
-- mlength);
-- goto failed;
-- }
-
- /* Commit to this ME/MD */
- CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of "
- "length %d/%d into md "LPX64" [%d] + %d\n",
- (op_mask == PTL_MD_OP_PUT) ? "put" : "get",
- index, src_nid, src_pid, mlength, rlength,
- md->md_lh.lh_cookie, md->md_niov, offset);
--
- lib_commit_md(nal, md, msg);
-- md->offset = offset + mlength;
-
- /* NB Caller sets ev.type and ev.hdr_data */
- msg->ev.initiator.nid = src_nid;
- msg->ev.initiator.pid = src_pid;
- msg->ev.pt_index = index;
- msg->ev.match_bits = match_bits;
- msg->ev.rlength = rlength;
- msg->ev.mlength = mlength;
- msg->ev.offset = offset;
-
- lib_md_deconstruct(nal, md, &msg->ev.md);
- ptl_md2handle(&msg->ev.md_handle, nal, md);
--
-- *offset_out = offset;
-- *mlength_out = mlength;
- *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 &&
- md->offset >= (md->length - md->max_size));
- RETURN (me);
-
- /* Auto-unlink NOW, so the ME gets unlinked if required.
- * We bumped md->pending above so the MD just gets flagged
- * for unlink when it is finalized. */
- if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) != 0 &&
- lib_md_exhausted(md))
- lib_md_unlink(nal, md);
-
- RETURN (md);
-- }
--
-- failed:
-- CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64
-- " offset %d length %d: no match\n",
- ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
- ni->ni_pid.nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
-- src_nid, src_pid, index, match_bits, roffset, rlength);
-- RETURN(NULL);
--}
--
- int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret)
-int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold)
--{
- PtlFailNid_in *args = v_args;
- PtlFailNid_out *ret = v_ret;
- lib_nal_t *nal = apinal->nal_data;
-- lib_test_peer_t *tp;
-- unsigned long flags;
-- struct list_head *el;
-- struct list_head *next;
-- struct list_head cull;
--
- if (args->threshold != 0) {
- if (threshold != 0) {
-- /* Adding a new entry */
- tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp));
- PORTAL_ALLOC(tp, sizeof(*tp));
-- if (tp == NULL)
- return (ret->rc = PTL_FAIL);
- return PTL_NO_SPACE;
--
- tp->tp_nid = args->nid;
- tp->tp_threshold = args->threshold;
- tp->tp_nid = nid;
- tp->tp_threshold = threshold;
--
- state_lock (nal, &flags);
- list_add (&tp->tp_list, &nal->ni.ni_test_peers);
- state_unlock (nal, &flags);
- return (ret->rc = PTL_OK);
- LIB_LOCK(nal, flags);
- list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers);
- LIB_UNLOCK(nal, flags);
- return PTL_OK;
-- }
--
-- /* removing entries */
-- INIT_LIST_HEAD (&cull);
--
- state_lock (nal, &flags);
- LIB_LOCK(nal, flags);
--
- list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
- list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
-- tp = list_entry (el, lib_test_peer_t, tp_list);
--
-- if (tp->tp_threshold == 0 || /* needs culling anyway */
- args->nid == PTL_NID_ANY || /* removing all entries */
- tp->tp_nid == args->nid) /* matched this one */
- nid == PTL_NID_ANY || /* removing all entries */
- tp->tp_nid == nid) /* matched this one */
-- {
-- list_del (&tp->tp_list);
-- list_add (&tp->tp_list, &cull);
-- }
-- }
--
- state_unlock (nal, &flags);
- LIB_UNLOCK(nal, flags);
--
-- while (!list_empty (&cull)) {
-- tp = list_entry (cull.next, lib_test_peer_t, tp_list);
--
-- list_del (&tp->tp_list);
- nal->cb_free (nal, tp, sizeof (*tp));
- PORTAL_FREE(tp, sizeof (*tp));
-- }
- return (ret->rc = PTL_OK);
- return PTL_OK;
--}
--
--static int
- fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing)
-fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing)
--{
-- lib_test_peer_t *tp;
-- struct list_head *el;
-- struct list_head *next;
-- unsigned long flags;
-- struct list_head cull;
-- int fail = 0;
--
-- INIT_LIST_HEAD (&cull);
--
- state_lock (nal, &flags);
- LIB_LOCK (nal, flags);
--
- list_for_each_safe (el, next, &nal->ni.ni_test_peers) {
- list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
-- tp = list_entry (el, lib_test_peer_t, tp_list);
--
-- if (tp->tp_threshold == 0) {
-- /* zombie entry */
-- if (outgoing) {
-- /* only cull zombies on outgoing tests,
-- * since we may be at interrupt priority on
-- * incoming messages. */
-- list_del (&tp->tp_list);
-- list_add (&tp->tp_list, &cull);
-- }
-- continue;
-- }
--
-- if (tp->tp_nid == PTL_NID_ANY || /* fail every peer */
-- nid == tp->tp_nid) { /* fail this peer */
-- fail = 1;
--
-- if (tp->tp_threshold != PTL_MD_THRESH_INF) {
-- tp->tp_threshold--;
-- if (outgoing &&
-- tp->tp_threshold == 0) {
-- /* see above */
-- list_del (&tp->tp_list);
-- list_add (&tp->tp_list, &cull);
-- }
-- }
-- break;
-- }
-- }
--
- state_unlock (nal, &flags);
- LIB_UNLOCK (nal, flags);
--
-- while (!list_empty (&cull)) {
-- tp = list_entry (cull.next, lib_test_peer_t, tp_list);
-- list_del (&tp->tp_list);
--
- nal->cb_free (nal, tp, sizeof (*tp));
- PORTAL_FREE(tp, sizeof (*tp));
-- }
--
-- return (fail);
--}
--
--ptl_size_t
--lib_iov_nob (int niov, struct iovec *iov)
--{
-- ptl_size_t nob = 0;
--
-- while (niov-- > 0)
-- nob += (iov++)->iov_len;
--
-- return (nob);
--}
--
--void
--lib_copy_iov2buf (char *dest, int niov, struct iovec *iov,
-- ptl_size_t offset, ptl_size_t len)
--{
-- ptl_size_t nob;
--
-- if (len == 0)
-- return;
--
-- /* skip complete frags before 'offset' */
-- LASSERT (niov > 0);
-- while (offset >= iov->iov_len) {
-- offset -= iov->iov_len;
-- iov++;
-- niov--;
-- LASSERT (niov > 0);
-- }
--
-- do {
-- LASSERT (niov > 0);
-- nob = MIN (iov->iov_len - offset, len);
-- memcpy (dest, iov->iov_base + offset, nob);
--
-- len -= nob;
-- dest += nob;
-- niov--;
-- iov++;
-- offset = 0;
-- } while (len > 0);
--}
--
--void
--lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset,
-- char *src, ptl_size_t len)
--{
-- ptl_size_t nob;
--
-- if (len == 0)
-- return;
--
-- /* skip complete frags before 'offset' */
-- LASSERT (niov > 0);
-- while (offset >= iov->iov_len) {
-- offset -= iov->iov_len;
-- iov++;
-- niov--;
-- LASSERT (niov > 0);
-- }
--
-- do {
-- LASSERT (niov > 0);
-- nob = MIN (iov->iov_len - offset, len);
-- memcpy (iov->iov_base + offset, src, nob);
--
-- len -= nob;
-- src += nob;
-- niov--;
-- iov++;
-- offset = 0;
-- } while (len > 0);
--}
--
--int
--lib_extract_iov (int dst_niov, struct iovec *dst,
-- int src_niov, struct iovec *src,
-- ptl_size_t offset, ptl_size_t len)
--{
-- /* Initialise 'dst' to the subset of 'src' starting at 'offset',
-- * for exactly 'len' bytes, and return the number of entries.
-- * NB not destructive to 'src' */
-- ptl_size_t frag_len;
-- int niov;
--
-- if (len == 0) /* no data => */
-- return (0); /* no frags */
--
-- LASSERT (src_niov > 0);
-- while (offset >= src->iov_len) { /* skip initial frags */
-- offset -= src->iov_len;
-- src_niov--;
-- src++;
-- LASSERT (src_niov > 0);
-- }
--
-- niov = 1;
-- for (;;) {
-- LASSERT (src_niov > 0);
-- LASSERT (niov <= dst_niov);
--
-- frag_len = src->iov_len - offset;
-- dst->iov_base = ((char *)src->iov_base) + offset;
--
-- if (len <= frag_len) {
-- dst->iov_len = len;
-- return (niov);
-- }
--
-- dst->iov_len = frag_len;
--
-- len -= frag_len;
-- dst++;
-- src++;
-- niov++;
-- src_niov--;
-- offset = 0;
-- }
--}
--
--#ifndef __KERNEL__
--ptl_size_t
--lib_kiov_nob (int niov, ptl_kiov_t *kiov)
--{
-- LASSERT (0);
-- return (0);
--}
--
--void
--lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov,
-- ptl_size_t offset, ptl_size_t len)
--{
-- LASSERT (0);
--}
--
--void
--lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset,
-- char *src, ptl_size_t len)
--{
-- LASSERT (0);
--}
--
--int
--lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
-- int src_niov, ptl_kiov_t *src,
-- ptl_size_t offset, ptl_size_t len)
--{
-- LASSERT (0);
--}
--
--#else
--
--ptl_size_t
--lib_kiov_nob (int niov, ptl_kiov_t *kiov)
--{
-- ptl_size_t nob = 0;
--
-- while (niov-- > 0)
-- nob += (kiov++)->kiov_len;
--
-- return (nob);
--}
--
--void
--lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov,
-- ptl_size_t offset, ptl_size_t len)
--{
-- ptl_size_t nob;
-- char *addr;
--
-- if (len == 0)
-- return;
--
-- LASSERT (!in_interrupt ());
--
-- LASSERT (niov > 0);
-- while (offset > kiov->kiov_len) {
-- offset -= kiov->kiov_len;
-- kiov++;
-- niov--;
-- LASSERT (niov > 0);
-- }
--
-- do{
-- LASSERT (niov > 0);
-- nob = MIN (kiov->kiov_len - offset, len);
--
-- addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
-- memcpy (dest, addr, nob);
-- kunmap (kiov->kiov_page);
--
-- len -= nob;
-- dest += nob;
-- niov--;
-- kiov++;
-- offset = 0;
-- } while (len > 0);
--}
--
--void
--lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset,
-- char *src, ptl_size_t len)
--{
-- ptl_size_t nob;
-- char *addr;
--
-- if (len == 0)
-- return;
--
-- LASSERT (!in_interrupt ());
--
-- LASSERT (niov > 0);
-- while (offset >= kiov->kiov_len) {
-- offset -= kiov->kiov_len;
-- kiov++;
-- niov--;
-- LASSERT (niov > 0);
-- }
--
-- do {
-- LASSERT (niov > 0);
-- nob = MIN (kiov->kiov_len - offset, len);
--
-- addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
-- memcpy (addr, src, nob);
-- kunmap (kiov->kiov_page);
--
-- len -= nob;
-- src += nob;
-- niov--;
-- kiov++;
-- offset = 0;
-- } while (len > 0);
--}
--
--int
--lib_extract_kiov (int dst_niov, ptl_kiov_t *dst,
-- int src_niov, ptl_kiov_t *src,
-- ptl_size_t offset, ptl_size_t len)
--{
-- /* Initialise 'dst' to the subset of 'src' starting at 'offset',
-- * for exactly 'len' bytes, and return the number of entries.
-- * NB not destructive to 'src' */
-- ptl_size_t frag_len;
-- int niov;
--
-- if (len == 0) /* no data => */
-- return (0); /* no frags */
--
-- LASSERT (src_niov > 0);
-- while (offset >= src->kiov_len) { /* skip initial frags */
-- offset -= src->kiov_len;
-- src_niov--;
-- src++;
-- LASSERT (src_niov > 0);
-- }
--
-- niov = 1;
-- for (;;) {
-- LASSERT (src_niov > 0);
-- LASSERT (niov <= dst_niov);
--
-- frag_len = src->kiov_len - offset;
-- dst->kiov_page = src->kiov_page;
-- dst->kiov_offset = src->kiov_offset + offset;
--
-- if (len <= frag_len) {
-- dst->kiov_len = len;
-- LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
-- return (niov);
-- }
--
-- dst->kiov_len = frag_len;
-- LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
--
-- len -= frag_len;
-- dst++;
-- src++;
-- niov++;
-- src_niov--;
-- offset = 0;
-- }
--}
--#endif
--
--ptl_err_t
- lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
-lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
-- ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen)
--{
-- if (mlen == 0)
- return (nal->cb_recv(nal, private, msg,
- 0, NULL,
- offset, mlen, rlen));
- return (nal->libnal_recv(nal, private, msg,
- 0, NULL,
- offset, mlen, rlen));
--
-- if ((md->options & PTL_MD_KIOV) == 0)
- return (nal->cb_recv(nal, private, msg,
- md->md_niov, md->md_iov.iov,
- offset, mlen, rlen));
- return (nal->libnal_recv(nal, private, msg,
- md->md_niov, md->md_iov.iov,
- offset, mlen, rlen));
--
- return (nal->cb_recv_pages(nal, private, msg,
- md->md_niov, md->md_iov.kiov,
- offset, mlen, rlen));
- return (nal->libnal_recv_pages(nal, private, msg,
- md->md_niov, md->md_iov.kiov,
- offset, mlen, rlen));
--}
--
--ptl_err_t
- lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg,
-lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
-- ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-- lib_md_t *md, ptl_size_t offset, ptl_size_t len)
--{
-- if (len == 0)
- return (nal->cb_send(nal, private, msg,
- hdr, type, nid, pid,
- 0, NULL,
- offset, len));
- return (nal->libnal_send(nal, private, msg,
- hdr, type, nid, pid,
- 0, NULL,
- offset, len));
--
-- if ((md->options & PTL_MD_KIOV) == 0)
- return (nal->cb_send(nal, private, msg,
- hdr, type, nid, pid,
- md->md_niov, md->md_iov.iov,
- offset, len));
- return (nal->libnal_send(nal, private, msg,
- hdr, type, nid, pid,
- md->md_niov, md->md_iov.iov,
- offset, len));
--
- return (nal->cb_send_pages(nal, private, msg,
- hdr, type, nid, pid,
- md->md_niov, md->md_iov.kiov,
- offset, len));
- return (nal->libnal_send_pages(nal, private, msg,
- hdr, type, nid, pid,
- md->md_niov, md->md_iov.kiov,
- offset, len));
--}
--
--static void
- lib_commit_md (nal_cb_t *nal, lib_md_t *md, lib_msg_t *msg)
-lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg)
--{
- /* ALWAYS called holding the state_lock */
- lib_counters_t *counters = &nal->ni.counters;
- /* ALWAYS called holding the LIB_LOCK */
- lib_counters_t *counters = &nal->libnal_ni.ni_counters;
--
-- /* Here, we commit the MD to a network OP by marking it busy and
-- * decrementing its threshold. Come what may, the network "owns"
-- * the MD until a call to lib_finalize() signals completion. */
-- msg->md = md;
--
-- md->pending++;
-- if (md->threshold != PTL_MD_THRESH_INF) {
-- LASSERT (md->threshold > 0);
-- md->threshold--;
-- }
--
-- counters->msgs_alloc++;
-- if (counters->msgs_alloc > counters->msgs_max)
-- counters->msgs_max = counters->msgs_alloc;
--
- list_add (&msg->msg_list, &nal->ni.ni_active_msgs);
- list_add (&msg->msg_list, &nal->libnal_ni.ni_active_msgs);
--}
--
--static void
- lib_drop_message (nal_cb_t *nal, void *private, ptl_hdr_t *hdr)
-lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr)
--{
-- unsigned long flags;
--
-- /* CAVEAT EMPTOR: this only drops messages that we've not committed
-- * to receive (init_msg() not called) and therefore can't cause an
-- * event. */
--
- state_lock(nal, &flags);
- nal->ni.counters.drop_count++;
- nal->ni.counters.drop_length += hdr->payload_length;
- state_unlock(nal, &flags);
- LIB_LOCK(nal, flags);
- nal->libnal_ni.ni_counters.drop_count++;
- nal->libnal_ni.ni_counters.drop_length += hdr->payload_length;
- LIB_UNLOCK(nal, flags);
--
-- /* NULL msg => if NAL calls lib_finalize it will be a noop */
-- (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
--}
--
--/*
-- * Incoming messages have a ptl_msg_t object associated with them
-- * by the library. This object encapsulates the state of the
-- * message and allows the NAL to do non-blocking receives or sends
-- * of long messages.
-- *
-- */
--static ptl_err_t
- parse_put(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
-parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
--{
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
-- ptl_size_t mlength = 0;
-- ptl_size_t offset = 0;
- int unlink = 0;
-- ptl_err_t rc;
- lib_me_t *me;
-- lib_md_t *md;
-- unsigned long flags;
--
-- /* Convert put fields to host byte order */
- hdr->msg.put.match_bits = NTOH__u64 (hdr->msg.put.match_bits);
- hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index);
- hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset);
- hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits);
- hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index);
- hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset);
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
- hdr->src_nid, hdr->src_pid,
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.match_bits,
- &mlength, &offset, &unlink);
- if (me == NULL) {
- state_unlock(nal, &flags);
- md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
- hdr->src_nid, hdr->src_pid,
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.match_bits, msg,
- &mlength, &offset);
- if (md == NULL) {
- LIB_UNLOCK(nal, flags);
-- return (PTL_FAIL);
-- }
-
- md = me->md;
- CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d "
- "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index,
- hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length,
- md->md_lh.lh_cookie, md->md_niov, offset);
-
- lib_commit_md(nal, md, msg);
--
- msg->ev.type = PTL_EVENT_PUT;
- msg->ev.initiator.nid = hdr->src_nid;
- msg->ev.initiator.pid = hdr->src_pid;
- msg->ev.portal = hdr->msg.put.ptl_index;
- msg->ev.match_bits = hdr->msg.put.match_bits;
- msg->ev.rlength = hdr->payload_length;
- msg->ev.mlength = mlength;
- msg->ev.offset = offset;
- msg->ev.type = PTL_EVENT_PUT_END;
-- msg->ev.hdr_data = hdr->msg.put.hdr_data;
-
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
--
-- if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
-- !(md->options & PTL_MD_ACK_DISABLE)) {
-- msg->ack_wmd = hdr->msg.put.ack_wmd;
-- }
-
- ni->counters.recv_count++;
- ni->counters.recv_length += mlength;
--
- /* only unlink after MD's pending count has been bumped in
- * lib_commit_md() otherwise lib_me_unlink() will nuke it */
- if (unlink)
- lib_me_unlink (nal, me);
- ni->ni_counters.recv_count++;
- ni->ni_counters.recv_length += mlength;
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
-- rc = lib_recv(nal, private, msg, md, offset, mlength,
-- hdr->payload_length);
-- if (rc != PTL_OK)
-- CERROR(LPU64": error on receiving PUT from "LPU64": %d\n",
- ni->nid, hdr->src_nid, rc);
- ni->ni_pid.nid, hdr->src_nid, rc);
--
-- return (rc);
--}
--
--static ptl_err_t
- parse_get(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
-parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
--{
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
-- ptl_size_t mlength = 0;
-- ptl_size_t offset = 0;
- int unlink = 0;
- lib_me_t *me;
-- lib_md_t *md;
-- ptl_hdr_t reply;
-- unsigned long flags;
-- int rc;
--
-- /* Convert get fields to host byte order */
- hdr->msg.get.match_bits = NTOH__u64 (hdr->msg.get.match_bits);
- hdr->msg.get.ptl_index = NTOH__u32 (hdr->msg.get.ptl_index);
- hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length);
- hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset);
- hdr->msg.get.match_bits = le64_to_cpu(hdr->msg.get.match_bits);
- hdr->msg.get.ptl_index = le32_to_cpu(hdr->msg.get.ptl_index);
- hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length);
- hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset);
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
- hdr->src_nid, hdr->src_pid,
- hdr->msg.get.sink_length, hdr->msg.get.src_offset,
- hdr->msg.get.match_bits,
- &mlength, &offset, &unlink);
- if (me == NULL) {
- state_unlock(nal, &flags);
- md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
- hdr->src_nid, hdr->src_pid,
- hdr->msg.get.sink_length, hdr->msg.get.src_offset,
- hdr->msg.get.match_bits, msg,
- &mlength, &offset);
- if (md == NULL) {
- LIB_UNLOCK(nal, flags);
-- return (PTL_FAIL);
-- }
-
- md = me->md;
- CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d "
- "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index,
- hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length,
- md->md_lh.lh_cookie, md->md_niov, offset);
-
- lib_commit_md(nal, md, msg);
--
- msg->ev.type = PTL_EVENT_GET;
- msg->ev.initiator.nid = hdr->src_nid;
- msg->ev.initiator.pid = hdr->src_pid;
- msg->ev.portal = hdr->msg.get.ptl_index;
- msg->ev.match_bits = hdr->msg.get.match_bits;
- msg->ev.rlength = hdr->payload_length;
- msg->ev.mlength = mlength;
- msg->ev.offset = offset;
- msg->ev.type = PTL_EVENT_GET_END;
-- msg->ev.hdr_data = 0;
-
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
-
- ni->counters.send_count++;
- ni->counters.send_length += mlength;
--
- /* only unlink after MD's refcount has been bumped in
- * lib_commit_md() otherwise lib_me_unlink() will nuke it */
- if (unlink)
- lib_me_unlink (nal, me);
- ni->ni_counters.send_count++;
- ni->ni_counters.send_length += mlength;
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
-- memset (&reply, 0, sizeof (reply));
- reply.type = HTON__u32 (PTL_MSG_REPLY);
- reply.dest_nid = HTON__u64 (hdr->src_nid);
- reply.src_nid = HTON__u64 (ni->nid);
- reply.dest_pid = HTON__u32 (hdr->src_pid);
- reply.src_pid = HTON__u32 (ni->pid);
- reply.payload_length = HTON__u32 (mlength);
- reply.type = cpu_to_le32(PTL_MSG_REPLY);
- reply.dest_nid = cpu_to_le64(hdr->src_nid);
- reply.dest_pid = cpu_to_le32(hdr->src_pid);
- reply.src_nid = cpu_to_le64(ni->ni_pid.nid);
- reply.src_pid = cpu_to_le32(ni->ni_pid.pid);
- reply.payload_length = cpu_to_le32(mlength);
--
-- reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
--
-- /* NB call lib_send() _BEFORE_ lib_recv() completes the incoming
-- * message. Some NALs _require_ this to implement optimized GET */
--
-- rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY,
-- hdr->src_nid, hdr->src_pid, md, offset, mlength);
-- if (rc != PTL_OK)
-- CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n",
- ni->nid, hdr->src_nid, rc);
- ni->ni_pid.nid, hdr->src_nid, rc);
--
-- /* Discard any junk after the hdr */
-- (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
--
-- return (rc);
--}
--
--static ptl_err_t
- parse_reply(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
-parse_reply(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
--{
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
-- lib_md_t *md;
-- int rlength;
-- int length;
-- unsigned long flags;
-- ptl_err_t rc;
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
-- /* NB handles only looked up by creator (no flips) */
-- md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal);
-- if (md == NULL || md->threshold == 0) {
-- CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n",
- ni->nid, hdr->src_nid,
- ni->ni_pid.nid, hdr->src_nid,
-- md == NULL ? "invalid" : "inactive",
-- hdr->msg.reply.dst_wmd.wh_interface_cookie,
-- hdr->msg.reply.dst_wmd.wh_object_cookie);
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
-- return (PTL_FAIL);
-- }
--
-- LASSERT (md->offset == 0);
--
-- length = rlength = hdr->payload_length;
--
-- if (length > md->length) {
-- if ((md->options & PTL_MD_TRUNCATE) == 0) {
-- CERROR (LPU64": Dropping REPLY from "LPU64
-- " length %d for MD "LPX64" would overflow (%d)\n",
- ni->nid, hdr->src_nid, length,
- ni->ni_pid.nid, hdr->src_nid, length,
-- hdr->msg.reply.dst_wmd.wh_object_cookie,
-- md->length);
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
-- return (PTL_FAIL);
-- }
-- length = md->length;
-- }
--
-- CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n",
-- hdr->src_nid, length, rlength,
-- hdr->msg.reply.dst_wmd.wh_object_cookie);
--
-- lib_commit_md(nal, md, msg);
--
- msg->ev.type = PTL_EVENT_REPLY;
- msg->ev.type = PTL_EVENT_REPLY_END;
-- msg->ev.initiator.nid = hdr->src_nid;
-- msg->ev.initiator.pid = hdr->src_pid;
-- msg->ev.rlength = rlength;
-- msg->ev.mlength = length;
-- msg->ev.offset = 0;
--
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- lib_md_deconstruct(nal, md, &msg->ev.md);
- ptl_md2handle(&msg->ev.md_handle, nal, md);
--
- ni->counters.recv_count++;
- ni->counters.recv_length += length;
- ni->ni_counters.recv_count++;
- ni->ni_counters.recv_length += length;
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
-- rc = lib_recv(nal, private, msg, md, 0, length, rlength);
-- if (rc != PTL_OK)
-- CERROR(LPU64": error on receiving REPLY from "LPU64": %d\n",
- ni->nid, hdr->src_nid, rc);
- ni->ni_pid.nid, hdr->src_nid, rc);
--
-- return (rc);
--}
--
--static ptl_err_t
- parse_ack(nal_cb_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
-parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
--{
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
-- lib_md_t *md;
-- unsigned long flags;
--
-- /* Convert ack fields to host byte order */
- hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits);
- hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength);
- hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits);
- hdr->msg.ack.mlength = le32_to_cpu(hdr->msg.ack.mlength);
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
-- /* NB handles only looked up by creator (no flips) */
-- md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal);
-- if (md == NULL || md->threshold == 0) {
-- CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD "
- LPX64"."LPX64"\n", ni->nid, hdr->src_nid,
- LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid,
-- (md == NULL) ? "invalid" : "inactive",
-- hdr->msg.ack.dst_wmd.wh_interface_cookie,
-- hdr->msg.ack.dst_wmd.wh_object_cookie);
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
-- return (PTL_FAIL);
-- }
--
-- CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n",
- ni->nid, hdr->src_nid,
- ni->ni_pid.nid, hdr->src_nid,
-- hdr->msg.ack.dst_wmd.wh_object_cookie);
--
-- lib_commit_md(nal, md, msg);
--
-- msg->ev.type = PTL_EVENT_ACK;
-- msg->ev.initiator.nid = hdr->src_nid;
-- msg->ev.initiator.pid = hdr->src_pid;
-- msg->ev.mlength = hdr->msg.ack.mlength;
-- msg->ev.match_bits = hdr->msg.ack.match_bits;
--
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- lib_md_deconstruct(nal, md, &msg->ev.md);
- ptl_md2handle(&msg->ev.md_handle, nal, md);
--
- ni->counters.recv_count++;
- ni->ni_counters.recv_count++;
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
-- /* We have received and matched up the ack OK, create the
-- * completion event now... */
-- lib_finalize(nal, private, msg, PTL_OK);
--
-- /* ...and now discard any junk after the hdr */
-- (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
--
-- return (PTL_OK);
--}
--
--static char *
--hdr_type_string (ptl_hdr_t *hdr)
--{
-- switch (hdr->type) {
-- case PTL_MSG_ACK:
-- return ("ACK");
-- case PTL_MSG_PUT:
-- return ("PUT");
-- case PTL_MSG_GET:
-- return ("GET");
-- case PTL_MSG_REPLY:
-- return ("REPLY");
-- case PTL_MSG_HELLO:
-- return ("HELLO");
-- default:
-- return ("<UNKNOWN>");
-- }
--}
--
- void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr)
-void print_hdr(lib_nal_t *nal, ptl_hdr_t * hdr)
--{
-- char *type_str = hdr_type_string (hdr);
--
- nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str);
- nal->cb_printf(nal, " From nid/pid %Lu/%Lu", hdr->src_nid,
- hdr->src_pid);
- nal->cb_printf(nal, " To nid/pid %Lu/%Lu\n", hdr->dest_nid,
- hdr->dest_pid);
- CWARN("P3 Header at %p of type %s\n", hdr, type_str);
- CWARN(" From nid/pid "LPX64"/%u", hdr->src_nid, hdr->src_pid);
- CWARN(" To nid/pid "LPX64"/%u\n", hdr->dest_nid, hdr->dest_pid);
--
-- switch (hdr->type) {
-- default:
-- break;
--
-- case PTL_MSG_PUT:
- nal->cb_printf(nal,
- " Ptl index %d, ack md "LPX64"."LPX64", "
- "match bits "LPX64"\n",
- hdr->msg.put.ptl_index,
- hdr->msg.put.ack_wmd.wh_interface_cookie,
- hdr->msg.put.ack_wmd.wh_object_cookie,
- hdr->msg.put.match_bits);
- nal->cb_printf(nal,
- " Length %d, offset %d, hdr data "LPX64"\n",
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.hdr_data);
- CWARN(" Ptl index %d, ack md "LPX64"."LPX64", "
- "match bits "LPX64"\n",
- hdr->msg.put.ptl_index,
- hdr->msg.put.ack_wmd.wh_interface_cookie,
- hdr->msg.put.ack_wmd.wh_object_cookie,
- hdr->msg.put.match_bits);
- CWARN(" Length %d, offset %d, hdr data "LPX64"\n",
- hdr->payload_length, hdr->msg.put.offset,
- hdr->msg.put.hdr_data);
-- break;
--
-- case PTL_MSG_GET:
- nal->cb_printf(nal,
- " Ptl index %d, return md "LPX64"."LPX64", "
- "match bits "LPX64"\n", hdr->msg.get.ptl_index,
- hdr->msg.get.return_wmd.wh_interface_cookie,
- hdr->msg.get.return_wmd.wh_object_cookie,
- hdr->msg.get.match_bits);
- nal->cb_printf(nal,
- " Length %d, src offset %d\n",
- hdr->msg.get.sink_length,
- hdr->msg.get.src_offset);
- CWARN(" Ptl index %d, return md "LPX64"."LPX64", "
- "match bits "LPX64"\n", hdr->msg.get.ptl_index,
- hdr->msg.get.return_wmd.wh_interface_cookie,
- hdr->msg.get.return_wmd.wh_object_cookie,
- hdr->msg.get.match_bits);
- CWARN(" Length %d, src offset %d\n",
- hdr->msg.get.sink_length,
- hdr->msg.get.src_offset);
-- break;
--
-- case PTL_MSG_ACK:
- nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
- "manipulated length %d\n",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie,
- hdr->msg.ack.mlength);
- CWARN(" dst md "LPX64"."LPX64", "
- "manipulated length %d\n",
- hdr->msg.ack.dst_wmd.wh_interface_cookie,
- hdr->msg.ack.dst_wmd.wh_object_cookie,
- hdr->msg.ack.mlength);
-- break;
--
-- case PTL_MSG_REPLY:
- nal->cb_printf(nal, " dst md "LPX64"."LPX64", "
- "length %d\n",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie,
- hdr->payload_length);
- CWARN(" dst md "LPX64"."LPX64", "
- "length %d\n",
- hdr->msg.reply.dst_wmd.wh_interface_cookie,
- hdr->msg.reply.dst_wmd.wh_object_cookie,
- hdr->payload_length);
-- }
--
--} /* end of print_hdr() */
--
--
- void
- lib_parse(nal_cb_t *nal, ptl_hdr_t *hdr, void *private)
-ptl_err_t
-lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private)
--{
-- unsigned long flags;
-- ptl_err_t rc;
-- lib_msg_t *msg;
-
- /* NB we return PTL_OK if we manage to parse the header and believe
- * it looks OK. Anything that goes wrong with receiving the
- * message after that point is the responsibility of the NAL */
--
-- /* convert common fields to host byte order */
- hdr->dest_nid = NTOH__u64 (hdr->dest_nid);
- hdr->src_nid = NTOH__u64 (hdr->src_nid);
- hdr->dest_pid = NTOH__u32 (hdr->dest_pid);
- hdr->src_pid = NTOH__u32 (hdr->src_pid);
- hdr->type = NTOH__u32 (hdr->type);
- hdr->payload_length = NTOH__u32(hdr->payload_length);
- #if 0
- nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n",
- nal->ni.nid, nal, hdr, hdr->type);
- print_hdr(nal, hdr);
- #endif
- if (hdr->type == PTL_MSG_HELLO) {
- hdr->type = le32_to_cpu(hdr->type);
- hdr->src_nid = le64_to_cpu(hdr->src_nid);
- hdr->src_pid = le32_to_cpu(hdr->src_pid);
- hdr->dest_pid = le32_to_cpu(hdr->dest_pid);
- hdr->payload_length = le32_to_cpu(hdr->payload_length);
-
- switch (hdr->type) {
- case PTL_MSG_HELLO: {
-- /* dest_nid is really ptl_magicversion_t */
-- ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid;
--
- CERROR (LPU64": Dropping unexpected HELLO message: "
- mv->magic = le32_to_cpu(mv->magic);
- mv->version_major = le16_to_cpu(mv->version_major);
- mv->version_minor = le16_to_cpu(mv->version_minor);
-
- if (mv->magic == PORTALS_PROTO_MAGIC &&
- mv->version_major == PORTALS_PROTO_VERSION_MAJOR &&
- mv->version_minor == PORTALS_PROTO_VERSION_MINOR) {
- CWARN (LPU64": Dropping unexpected HELLO message: "
- "magic %d, version %d.%d from "LPD64"\n",
- nal->libnal_ni.ni_pid.nid, mv->magic,
- mv->version_major, mv->version_minor,
- hdr->src_nid);
-
- /* it's good but we don't want it */
- lib_drop_message(nal, private, hdr);
- return PTL_OK;
- }
-
- /* we got garbage */
- CERROR (LPU64": Bad HELLO message: "
-- "magic %d, version %d.%d from "LPD64"\n",
- nal->ni.nid, mv->magic,
- nal->libnal_ni.ni_pid.nid, mv->magic,
-- mv->version_major, mv->version_minor,
-- hdr->src_nid);
- lib_drop_message(nal, private, hdr);
- return;
- return PTL_FAIL;
-- }
-
- if (hdr->dest_nid != nal->ni.nid) {
- CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64
- " (not me)\n", nal->ni.nid, hdr_type_string (hdr),
- hdr->src_nid, hdr->dest_nid);
- lib_drop_message(nal, private, hdr);
- return;
-
- case PTL_MSG_ACK:
- case PTL_MSG_PUT:
- case PTL_MSG_GET:
- case PTL_MSG_REPLY:
- hdr->dest_nid = le64_to_cpu(hdr->dest_nid);
- if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) {
- CERROR(LPU64": BAD dest NID in %s message from"
- LPU64" to "LPU64" (not me)\n",
- nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
- hdr->src_nid, hdr->dest_nid);
- return PTL_FAIL;
- }
- break;
-
- default:
- CERROR(LPU64": Bad message type 0x%x from "LPU64"\n",
- nal->libnal_ni.ni_pid.nid, hdr->type, hdr->src_nid);
- return PTL_FAIL;
-- }
--
- if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
- /* We've decided we're not receiving garbage since we can parse the
- * header. We will return PTL_OK come what may... */
-
- if (!list_empty (&nal->libnal_ni.ni_test_peers) && /* normally we don't */
-- fail_peer (nal, hdr->src_nid, 0)) /* shall we now? */
-- {
-- CERROR(LPU64": Dropping incoming %s from "LPU64
-- ": simulated failure\n",
- nal->ni.nid, hdr_type_string (hdr),
- nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
-- hdr->src_nid);
-- lib_drop_message(nal, private, hdr);
- return;
- return PTL_OK;
-- }
--
-- msg = lib_msg_alloc(nal);
-- if (msg == NULL) {
-- CERROR(LPU64": Dropping incoming %s from "LPU64
-- ": can't allocate a lib_msg_t\n",
- nal->ni.nid, hdr_type_string (hdr),
- nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
-- hdr->src_nid);
-- lib_drop_message(nal, private, hdr);
- return;
- return PTL_OK;
-- }
-
- do_gettimeofday(&msg->ev.arrival_time);
--
-- switch (hdr->type) {
-- case PTL_MSG_ACK:
-- rc = parse_ack(nal, hdr, private, msg);
-- break;
-- case PTL_MSG_PUT:
-- rc = parse_put(nal, hdr, private, msg);
-- break;
-- case PTL_MSG_GET:
-- rc = parse_get(nal, hdr, private, msg);
-- break;
-- case PTL_MSG_REPLY:
-- rc = parse_reply(nal, hdr, private, msg);
-- break;
-- default:
- CERROR(LPU64": Dropping <unknown> message from "LPU64
- ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid,
- hdr->type);
- rc = PTL_FAIL;
- LASSERT(0);
- rc = PTL_FAIL; /* no compiler warning please */
-- break;
-- }
--
-- if (rc != PTL_OK) {
-- if (msg->md != NULL) {
-- /* committed... */
-- lib_finalize(nal, private, msg, rc);
-- } else {
- state_lock(nal, &flags);
- lib_msg_free(nal, msg); /* expects state_lock held */
- state_unlock(nal, &flags);
- LIB_LOCK(nal, flags);
- lib_msg_free(nal, msg); /* expects LIB_LOCK held */
- LIB_UNLOCK(nal, flags);
--
-- lib_drop_message(nal, private, hdr);
-- }
-- }
-
- return PTL_OK;
- /* That's "OK I can parse it", not "OK I like it" :) */
--}
--
--int
- do_PtlPut(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
-lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh,
- ptl_ack_req_t ack, ptl_process_id_t *id,
- ptl_pt_index_t portal, ptl_ac_index_t ac,
- ptl_match_bits_t match_bits,
- ptl_size_t offset, ptl_hdr_data_t hdr_data)
--{
- /*
- * Incoming:
- * ptl_handle_md_t md_in
- * ptl_ack_req_t ack_req_in
- * ptl_process_id_t target_in
- * ptl_pt_index_t portal_in
- * ptl_ac_index_t cookie_in
- * ptl_match_bits_t match_bits_in
- * ptl_size_t offset_in
- *
- * Outgoing:
- */
-
- PtlPut_in *args = v_args;
- ptl_process_id_t *id = &args->target_in;
- PtlPut_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
- lib_nal_t *nal = apinal->nal_data;
- lib_ni_t *ni = &nal->libnal_ni;
-- lib_msg_t *msg;
-- ptl_hdr_t hdr;
-- lib_md_t *md;
-- unsigned long flags;
-- int rc;
--
- if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
- if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
-- fail_peer (nal, id->nid, 1)) /* shall we now? */
-- {
- CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
- nal->ni.nid, id->nid);
- return (ret->rc = PTL_INV_PROC);
- CERROR("Dropping PUT to "LPU64": simulated failure\n",
- id->nid);
- return PTL_PROCESS_INVALID;
-- }
--
-- msg = lib_msg_alloc(nal);
-- if (msg == NULL) {
-- CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
- ni->nid, id->nid);
- return (ret->rc = PTL_NOSPACE);
- ni->ni_pid.nid, id->nid);
- return PTL_NO_SPACE;
-- }
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- md = ptl_handle2md(&args->md_in, nal);
- md = ptl_handle2md(mdh, nal);
-- if (md == NULL || md->threshold == 0) {
-- lib_msg_free(nal, msg);
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
- return (ret->rc = PTL_INV_MD);
- return PTL_MD_INVALID;
-- }
--
- CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid,
- (unsigned long)id->pid);
- CDEBUG(D_NET, "PtlPut -> "LPX64"\n", id->nid);
--
-- memset (&hdr, 0, sizeof (hdr));
- hdr.type = HTON__u32 (PTL_MSG_PUT);
- hdr.dest_nid = HTON__u64 (id->nid);
- hdr.src_nid = HTON__u64 (ni->nid);
- hdr.dest_pid = HTON__u32 (id->pid);
- hdr.src_pid = HTON__u32 (ni->pid);
- hdr.payload_length = HTON__u32 (md->length);
- hdr.type = cpu_to_le32(PTL_MSG_PUT);
- hdr.dest_nid = cpu_to_le64(id->nid);
- hdr.dest_pid = cpu_to_le32(id->pid);
- hdr.src_nid = cpu_to_le64(ni->ni_pid.nid);
- hdr.src_pid = cpu_to_le32(ni->ni_pid.pid);
- hdr.payload_length = cpu_to_le32(md->length);
--
-- /* NB handles only looked up by creator (no flips) */
- if (args->ack_req_in == PTL_ACK_REQ) {
- if (ack == PTL_ACK_REQ) {
-- hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie;
-- hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie;
-- } else {
-- hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE;
-- }
--
- hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in);
- hdr.msg.put.ptl_index = HTON__u32 (args->portal_in);
- hdr.msg.put.offset = HTON__u32 (args->offset_in);
- hdr.msg.put.hdr_data = args->hdr_data_in;
- hdr.msg.put.match_bits = cpu_to_le64(match_bits);
- hdr.msg.put.ptl_index = cpu_to_le32(portal);
- hdr.msg.put.offset = cpu_to_le32(offset);
- hdr.msg.put.hdr_data = hdr_data;
--
-- lib_commit_md(nal, md, msg);
--
- msg->ev.type = PTL_EVENT_SENT;
- msg->ev.initiator.nid = ni->nid;
- msg->ev.initiator.pid = ni->pid;
- msg->ev.portal = args->portal_in;
- msg->ev.match_bits = args->match_bits_in;
- msg->ev.type = PTL_EVENT_SEND_END;
- msg->ev.initiator.nid = ni->ni_pid.nid;
- msg->ev.initiator.pid = ni->ni_pid.pid;
- msg->ev.pt_index = portal;
- msg->ev.match_bits = match_bits;
-- msg->ev.rlength = md->length;
-- msg->ev.mlength = md->length;
- msg->ev.offset = args->offset_in;
- msg->ev.hdr_data = args->hdr_data_in;
- msg->ev.offset = offset;
- msg->ev.hdr_data = hdr_data;
--
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- lib_md_deconstruct(nal, md, &msg->ev.md);
- ptl_md2handle(&msg->ev.md_handle, nal, md);
--
- ni->counters.send_count++;
- ni->counters.send_length += md->length;
- ni->ni_counters.send_count++;
- ni->ni_counters.send_length += md->length;
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
- rc = lib_send (nal, private, msg, &hdr, PTL_MSG_PUT,
- rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT,
-- id->nid, id->pid, md, 0, md->length);
-- if (rc != PTL_OK) {
- CERROR(LPU64": error sending PUT to "LPU64": %d\n",
- ni->nid, id->nid, rc);
- lib_finalize (nal, private, msg, rc);
- CERROR("Error sending PUT to "LPX64": %d\n",
- id->nid, rc);
- lib_finalize (nal, NULL, msg, rc);
-- }
--
-- /* completion will be signalled by an event */
- return ret->rc = PTL_OK;
- return PTL_OK;
--}
--
--lib_msg_t *
- lib_fake_reply_msg (nal_cb_t *nal, ptl_nid_t peer_nid, lib_md_t *getmd)
-lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
--{
-- /* The NAL can DMA direct to the GET md (i.e. no REPLY msg). This
- * returns a msg the NAL can pass to lib_finalize() so that a REPLY
- * event still occurs.
- * returns a msg for the NAL to pass to lib_finalize() when the sink
- * data has been received.
-- *
- * CAVEAT EMPTOR: 'getmd' is passed by pointer so it MUST be valid.
- * This can only be guaranteed while a lib_msg_t holds a reference
- * on it (ie. pending > 0), so best call this before the
- * lib_finalize() of the original GET. */
- * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
- * lib_finalize() is called on it, so the NAL must call this first */
--
- lib_ni_t *ni = &nal->ni;
- lib_ni_t *ni = &nal->libnal_ni;
-- lib_msg_t *msg = lib_msg_alloc(nal);
- lib_md_t *getmd = getmsg->md;
-- unsigned long flags;
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
-- LASSERT (getmd->pending > 0);
--
-- if (msg == NULL) {
-- CERROR ("Dropping REPLY from "LPU64": can't allocate msg\n",
-- peer_nid);
-- goto drop;
-- }
--
-- if (getmd->threshold == 0) {
-- CERROR ("Dropping REPLY from "LPU64" for inactive MD %p\n",
-- peer_nid, getmd);
-- goto drop_msg;
-- }
--
-- LASSERT (getmd->offset == 0);
--
-- CDEBUG(D_NET, "Reply from "LPU64" md %p\n", peer_nid, getmd);
--
-- lib_commit_md (nal, getmd, msg);
--
- msg->ev.type = PTL_EVENT_REPLY;
- msg->ev.type = PTL_EVENT_REPLY_END;
-- msg->ev.initiator.nid = peer_nid;
-- msg->ev.initiator.pid = 0; /* XXX FIXME!!! */
-- msg->ev.rlength = msg->ev.mlength = getmd->length;
-- msg->ev.offset = 0;
--
- lib_md_deconstruct(nal, getmd, &msg->ev.mem_desc);
- lib_md_deconstruct(nal, getmd, &msg->ev.md);
- ptl_md2handle(&msg->ev.md_handle, nal, getmd);
--
- ni->counters.recv_count++;
- ni->counters.recv_length += getmd->length;
- ni->ni_counters.recv_count++;
- ni->ni_counters.recv_length += getmd->length;
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
-- return msg;
--
-- drop_msg:
-- lib_msg_free(nal, msg);
-- drop:
- nal->ni.counters.drop_count++;
- nal->ni.counters.drop_length += getmd->length;
- nal->libnal_ni.ni_counters.drop_count++;
- nal->libnal_ni.ni_counters.drop_length += getmd->length;
--
- state_unlock (nal, &flags);
- LIB_UNLOCK (nal, flags);
--
-- return NULL;
--}
--
--int
- do_PtlGet(nal_cb_t *nal, void *private, void *v_args, void *v_ret)
-lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id,
- ptl_pt_index_t portal, ptl_ac_index_t ac,
- ptl_match_bits_t match_bits, ptl_size_t offset)
--{
- /*
- * Incoming:
- * ptl_handle_md_t md_in
- * ptl_process_id_t target_in
- * ptl_pt_index_t portal_in
- * ptl_ac_index_t cookie_in
- * ptl_match_bits_t match_bits_in
- * ptl_size_t offset_in
- *
- * Outgoing:
- */
-
- PtlGet_in *args = v_args;
- ptl_process_id_t *id = &args->target_in;
- PtlGet_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
- lib_nal_t *nal = apinal->nal_data;
- lib_ni_t *ni = &nal->libnal_ni;
-- lib_msg_t *msg;
-- ptl_hdr_t hdr;
-- lib_md_t *md;
-- unsigned long flags;
-- int rc;
--
- if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */
- if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
-- fail_peer (nal, id->nid, 1)) /* shall we now? */
-- {
- CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n",
- nal->ni.nid, id->nid);
- return (ret->rc = PTL_INV_PROC);
- CERROR("Dropping PUT to "LPX64": simulated failure\n",
- id->nid);
- return PTL_PROCESS_INVALID;
-- }
--
-- msg = lib_msg_alloc(nal);
-- if (msg == NULL) {
- CERROR(LPU64": Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
- ni->nid, id->nid);
- return (ret->rc = PTL_NOSPACE);
- CERROR("Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
- id->nid);
- return PTL_NO_SPACE;
-- }
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
- md = ptl_handle2md(&args->md_in, nal);
- md = ptl_handle2md(mdh, nal);
-- if (md == NULL || !md->threshold) {
-- lib_msg_free(nal, msg);
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
- return ret->rc = PTL_INV_MD;
- return PTL_MD_INVALID;
-- }
--
-- CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
-- (unsigned long)id->pid);
--
-- memset (&hdr, 0, sizeof (hdr));
- hdr.type = HTON__u32 (PTL_MSG_GET);
- hdr.dest_nid = HTON__u64 (id->nid);
- hdr.src_nid = HTON__u64 (ni->nid);
- hdr.dest_pid = HTON__u32 (id->pid);
- hdr.src_pid = HTON__u32 (ni->pid);
- hdr.type = cpu_to_le32(PTL_MSG_GET);
- hdr.dest_nid = cpu_to_le64(id->nid);
- hdr.dest_pid = cpu_to_le32(id->pid);
- hdr.src_nid = cpu_to_le64(ni->ni_pid.nid);
- hdr.src_pid = cpu_to_le32(ni->ni_pid.pid);
-- hdr.payload_length = 0;
--
-- /* NB handles only looked up by creator (no flips) */
-- hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie;
-- hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie;
--
- hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in);
- hdr.msg.get.ptl_index = HTON__u32 (args->portal_in);
- hdr.msg.get.src_offset = HTON__u32 (args->offset_in);
- hdr.msg.get.sink_length = HTON__u32 (md->length);
- hdr.msg.get.match_bits = cpu_to_le64(match_bits);
- hdr.msg.get.ptl_index = cpu_to_le32(portal);
- hdr.msg.get.src_offset = cpu_to_le32(offset);
- hdr.msg.get.sink_length = cpu_to_le32(md->length);
--
-- lib_commit_md(nal, md, msg);
--
- msg->ev.type = PTL_EVENT_SENT;
- msg->ev.initiator.nid = ni->nid;
- msg->ev.initiator.pid = ni->pid;
- msg->ev.portal = args->portal_in;
- msg->ev.match_bits = args->match_bits_in;
- msg->ev.type = PTL_EVENT_SEND_END;
- msg->ev.initiator = ni->ni_pid;
- msg->ev.pt_index = portal;
- msg->ev.match_bits = match_bits;
-- msg->ev.rlength = md->length;
-- msg->ev.mlength = md->length;
- msg->ev.offset = args->offset_in;
- msg->ev.offset = offset;
-- msg->ev.hdr_data = 0;
--
- lib_md_deconstruct(nal, md, &msg->ev.mem_desc);
- lib_md_deconstruct(nal, md, &msg->ev.md);
- ptl_md2handle(&msg->ev.md_handle, nal, md);
--
- ni->counters.send_count++;
- ni->ni_counters.send_count++;
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--
- rc = lib_send (nal, private, msg, &hdr, PTL_MSG_GET,
- rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_GET,
-- id->nid, id->pid, NULL, 0, 0);
-- if (rc != PTL_OK) {
-- CERROR(LPU64": error sending GET to "LPU64": %d\n",
- ni->nid, id->nid, rc);
- lib_finalize (nal, private, msg, rc);
- ni->ni_pid.nid, id->nid, rc);
- lib_finalize (nal, NULL, msg, rc);
-- }
--
-- /* completion will be signalled by an event */
- return ret->rc = PTL_OK;
- return PTL_OK;
--}
--
--void lib_assert_wire_constants (void)
--{
-- /* Wire protocol assertions generated by 'wirecheck'
- * running on Linux robert.bartonsoftware.com 2.4.20-18.9 #1 Thu May 29 06:54:41 EDT 2003 i68
- * with gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5) */
- * running on Linux mdevi 2.4.21-p4smp-55chaos #1 SMP Tue Jun 8 14:38:44 PDT 2004 i686 i686 i
- * with gcc version 3.2.3 20030502 (Red Hat Linux 3.2.3-34) */
--
--
-- /* Constants... */
-- LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded);
- LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0);
- LASSERT (PORTALS_PROTO_VERSION_MINOR == 3);
- LASSERT (PORTALS_PROTO_VERSION_MAJOR == 1);
- LASSERT (PORTALS_PROTO_VERSION_MINOR == 0);
-- LASSERT (PTL_MSG_ACK == 0);
-- LASSERT (PTL_MSG_PUT == 1);
-- LASSERT (PTL_MSG_GET == 2);
-- LASSERT (PTL_MSG_REPLY == 3);
-- LASSERT (PTL_MSG_HELLO == 4);
--
-- /* Checks for struct ptl_handle_wire_t */
-- LASSERT ((int)sizeof(ptl_handle_wire_t) == 16);
- LASSERT (offsetof(ptl_handle_wire_t, wh_interface_cookie) == 0);
- LASSERT ((int)offsetof(ptl_handle_wire_t, wh_interface_cookie) == 0);
-- LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8);
- LASSERT (offsetof(ptl_handle_wire_t, wh_object_cookie) == 8);
- LASSERT ((int)offsetof(ptl_handle_wire_t, wh_object_cookie) == 8);
-- LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_object_cookie) == 8);
--
-- /* Checks for struct ptl_magicversion_t */
-- LASSERT ((int)sizeof(ptl_magicversion_t) == 8);
- LASSERT (offsetof(ptl_magicversion_t, magic) == 0);
- LASSERT ((int)offsetof(ptl_magicversion_t, magic) == 0);
-- LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->magic) == 4);
- LASSERT (offsetof(ptl_magicversion_t, version_major) == 4);
- LASSERT ((int)offsetof(ptl_magicversion_t, version_major) == 4);
-- LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_major) == 2);
- LASSERT (offsetof(ptl_magicversion_t, version_minor) == 6);
- LASSERT ((int)offsetof(ptl_magicversion_t, version_minor) == 6);
-- LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_minor) == 2);
--
-- /* Checks for struct ptl_hdr_t */
-- LASSERT ((int)sizeof(ptl_hdr_t) == 72);
- LASSERT (offsetof(ptl_hdr_t, dest_nid) == 0);
- LASSERT ((int)offsetof(ptl_hdr_t, dest_nid) == 0);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_nid) == 8);
- LASSERT (offsetof(ptl_hdr_t, src_nid) == 8);
- LASSERT ((int)offsetof(ptl_hdr_t, src_nid) == 8);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_nid) == 8);
- LASSERT (offsetof(ptl_hdr_t, dest_pid) == 16);
- LASSERT ((int)offsetof(ptl_hdr_t, dest_pid) == 16);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_pid) == 4);
- LASSERT (offsetof(ptl_hdr_t, src_pid) == 20);
- LASSERT ((int)offsetof(ptl_hdr_t, src_pid) == 20);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_pid) == 4);
- LASSERT (offsetof(ptl_hdr_t, type) == 24);
- LASSERT ((int)offsetof(ptl_hdr_t, type) == 24);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->type) == 4);
- LASSERT (offsetof(ptl_hdr_t, payload_length) == 28);
- LASSERT ((int)offsetof(ptl_hdr_t, payload_length) == 28);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->payload_length) == 4);
- LASSERT (offsetof(ptl_hdr_t, msg) == 32);
- LASSERT ((int)offsetof(ptl_hdr_t, msg) == 32);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg) == 40);
--
-- /* Ack */
- LASSERT (offsetof(ptl_hdr_t, msg.ack.dst_wmd) == 32);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.dst_wmd) == 32);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16);
- LASSERT (offsetof(ptl_hdr_t, msg.ack.match_bits) == 48);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.match_bits) == 48);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.match_bits) == 8);
- LASSERT (offsetof(ptl_hdr_t, msg.ack.mlength) == 56);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.mlength) == 56);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.mlength) == 4);
--
-- /* Put */
- LASSERT (offsetof(ptl_hdr_t, msg.put.ack_wmd) == 32);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.put.ack_wmd) == 32);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16);
- LASSERT (offsetof(ptl_hdr_t, msg.put.match_bits) == 48);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.put.match_bits) == 48);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.match_bits) == 8);
- LASSERT (offsetof(ptl_hdr_t, msg.put.hdr_data) == 56);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.put.hdr_data) == 56);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.hdr_data) == 8);
- LASSERT (offsetof(ptl_hdr_t, msg.put.ptl_index) == 64);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.put.ptl_index) == 64);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ptl_index) == 4);
- LASSERT (offsetof(ptl_hdr_t, msg.put.offset) == 68);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.put.offset) == 68);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.offset) == 4);
--
-- /* Get */
- LASSERT (offsetof(ptl_hdr_t, msg.get.return_wmd) == 32);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.get.return_wmd) == 32);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.return_wmd) == 16);
- LASSERT (offsetof(ptl_hdr_t, msg.get.match_bits) == 48);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.get.match_bits) == 48);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.match_bits) == 8);
- LASSERT (offsetof(ptl_hdr_t, msg.get.ptl_index) == 56);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.get.ptl_index) == 56);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.ptl_index) == 4);
- LASSERT (offsetof(ptl_hdr_t, msg.get.src_offset) == 60);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.get.src_offset) == 60);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.src_offset) == 4);
- LASSERT (offsetof(ptl_hdr_t, msg.get.sink_length) == 64);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.get.sink_length) == 64);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.sink_length) == 4);
--
-- /* Reply */
- LASSERT (offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16);
--
-- /* Hello */
- LASSERT (offsetof(ptl_hdr_t, msg.hello.incarnation) == 32);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.hello.incarnation) == 32);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.incarnation) == 8);
- LASSERT (offsetof(ptl_hdr_t, msg.hello.type) == 40);
- LASSERT ((int)offsetof(ptl_hdr_t, msg.hello.type) == 40);
-- LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.type) == 4);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * lib/lib-msg.c
-- * Message decoding, parsing and finalizing routines
-- *
-- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
-- * Copyright (c) 2001-2002 Sandia National Laboratories
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#ifndef __KERNEL__
--# include <stdio.h>
--#else
--# define DEBUG_SUBSYSTEM S_PORTALS
--# include <linux/kp30.h>
--#endif
--
--#include <portals/lib-p30.h>
--
--void
- lib_enq_event_locked (nal_cb_t *nal, void *private,
-lib_enq_event_locked (lib_nal_t *nal, void *private,
-- lib_eq_t *eq, ptl_event_t *ev)
--{
-- ptl_event_t *eq_slot;
- int rc;
-
- ev->sequence = eq->sequence++; /* Allocate the next queue slot */
-
- /* size must be a power of 2 to handle a wrapped sequence # */
- LASSERT (eq->size != 0 &&
- eq->size == LOWEST_BIT_SET (eq->size));
- eq_slot = eq->base + (ev->sequence & (eq->size - 1));
--
- /* Copy the event into the allocated slot, ensuring all the rest of
- * the event's contents have been copied _before_ the sequence
- * number gets updated. A processes 'getting' an event waits on
- * the next queue slot's sequence to be 'new'. When it is, _all_
- * other event fields had better be consistent. I assert
- * 'sequence' is the last member, so I only need a 2 stage copy. */
- /* Allocate the next queue slot */
- ev->link = ev->sequence = eq->eq_enq_seq++;
- /* NB we don't support START events yet and we don't create a separate
- * UNLINK event unless an explicit unlink succeeds, so the link
- * sequence is pretty useless */
--
- LASSERT(sizeof (ptl_event_t) ==
- offsetof(ptl_event_t, sequence) + sizeof(ev->sequence));
- /* We don't support different uid/jids yet */
- ev->uid = 0;
- ev->jid = 0;
-
- /* size must be a power of 2 to handle sequence # overflow */
- LASSERT (eq->eq_size != 0 &&
- eq->eq_size == LOWEST_BIT_SET (eq->eq_size));
- eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1));
--
- rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev,
- offsetof (ptl_event_t, sequence));
- LASSERT (rc == PTL_OK);
- /* There is no race since both event consumers and event producers
- * take the LIB_LOCK(), so we don't screw around with memory
- * barriers, setting the sequence number last or wierd structure
- * layout assertions. */
- *eq_slot = *ev;
--
- #ifdef __KERNEL__
- barrier();
- #endif
- /* Updating the sequence number is what makes the event 'new' NB if
- * the cb_write below isn't atomic, this could cause a race with
- * PtlEQGet */
- rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence,
- (void *)&ev->sequence,sizeof (ev->sequence));
- LASSERT (rc == PTL_OK);
- /* Call the callback handler (if any) */
- if (eq->eq_callback != NULL)
- eq->eq_callback (eq_slot);
--
- /* Wake anyone sleeping for an event (see lib-eq.c) */
--#ifdef __KERNEL__
- barrier();
- if (waitqueue_active(&nal->libnal_ni.ni_waitq))
- wake_up_all(&nal->libnal_ni.ni_waitq);
-#else
- pthread_cond_broadcast(&nal->libnal_ni.ni_cond);
--#endif
-
- if (nal->cb_callback != NULL)
- nal->cb_callback(nal, private, eq, ev);
- else if (eq->event_callback != NULL)
- eq->event_callback(ev);
--}
--
--void
- lib_finalize(nal_cb_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
-lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
--{
-- lib_md_t *md;
-- int unlink;
-- unsigned long flags;
-- int rc;
-- ptl_hdr_t ack;
-
- /* ni went down while processing this message */
- if (nal->ni.up == 0)
- return;
--
-- if (msg == NULL)
-- return;
--
-- /* Only send an ACK if the PUT completed successfully */
-- if (status == PTL_OK &&
-- !ptl_is_wire_handle_none(&msg->ack_wmd)) {
--
- LASSERT(msg->ev.type == PTL_EVENT_PUT);
- LASSERT(msg->ev.type == PTL_EVENT_PUT_END);
--
-- memset (&ack, 0, sizeof (ack));
- ack.type = HTON__u32 (PTL_MSG_ACK);
- ack.dest_nid = HTON__u64 (msg->ev.initiator.nid);
- ack.src_nid = HTON__u64 (nal->ni.nid);
- ack.dest_pid = HTON__u32 (msg->ev.initiator.pid);
- ack.src_pid = HTON__u32 (nal->ni.pid);
- ack.type = cpu_to_le32(PTL_MSG_ACK);
- ack.dest_nid = cpu_to_le64(msg->ev.initiator.nid);
- ack.dest_pid = cpu_to_le32(msg->ev.initiator.pid);
- ack.src_nid = cpu_to_le64(nal->libnal_ni.ni_pid.nid);
- ack.src_pid = cpu_to_le32(nal->libnal_ni.ni_pid.pid);
-- ack.payload_length = 0;
--
-- ack.msg.ack.dst_wmd = msg->ack_wmd;
-- ack.msg.ack.match_bits = msg->ev.match_bits;
- ack.msg.ack.mlength = HTON__u32 (msg->ev.mlength);
- ack.msg.ack.mlength = cpu_to_le32(msg->ev.mlength);
--
-- rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK,
-- msg->ev.initiator.nid, msg->ev.initiator.pid,
-- NULL, 0, 0);
-- if (rc != PTL_OK) {
-- /* send failed: there's nothing else to clean up. */
-- CERROR("Error %d sending ACK to "LPX64"\n",
-- rc, msg->ev.initiator.nid);
-- }
-- }
--
-- md = msg->md;
--
- state_lock(nal, &flags);
- LIB_LOCK(nal, flags);
--
-- /* Now it's safe to drop my caller's ref */
-- md->pending--;
-- LASSERT (md->pending >= 0);
--
-- /* Should I unlink this MD? */
- unlink = (md->pending == 0 && /* No other refs */
- (md->threshold == 0 || /* All ops done */
- md->md_flags & PTL_MD_FLAG_UNLINK) != 0); /* black spot */
- if (md->pending != 0) /* other refs */
- unlink = 0;
- else if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) != 0)
- unlink = 1;
- else if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) == 0)
- unlink = 0;
- else
- unlink = lib_md_exhausted(md);
--
- msg->ev.status = status;
- msg->ev.ni_fail_type = status;
-- msg->ev.unlinked = unlink;
--
-- if (md->eq != NULL)
-- lib_enq_event_locked(nal, private, md->eq, &msg->ev);
--
-- if (unlink)
-- lib_md_unlink(nal, md);
--
-- list_del (&msg->msg_list);
- nal->ni.counters.msgs_alloc--;
- nal->libnal_ni.ni_counters.msgs_alloc--;
-- lib_msg_free(nal, msg);
--
- state_unlock(nal, &flags);
- LIB_UNLOCK(nal, flags);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * lib/lib-ni.c
-- * Network status registers and distance functions.
-- *
-- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
-- * Copyright (c) 2001-2002 Sandia National Laboratories
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#define DEBUG_SUBSYSTEM S_PORTALS
--#include <portals/lib-p30.h>
- #include <portals/arg-blocks.h>
--
--#define MAX_DIST 18446744073709551615ULL
-
- int do_PtlNIDebug(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
- {
- PtlNIDebug_in *args = v_args;
- PtlNIDebug_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
-
- ret->rc = ni->debug;
- ni->debug = args->mask_in;
-
- return 0;
- }
--
- int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-int lib_api_ni_status (nal_t *apinal, ptl_sr_index_t sr_idx,
- ptl_sr_value_t *status)
--{
- /*
- * Incoming:
- * ptl_handle_ni_t interface_in
- * ptl_sr_index_t register_in
- *
- * Outgoing:
- * ptl_sr_value_t * status_out
- */
-
- PtlNIStatus_in *args = v_args;
- PtlNIStatus_out *ret = v_ret;
- lib_ni_t *ni = &nal->ni;
- lib_counters_t *count = &ni->counters;
-
- if (!args)
- return ret->rc = PTL_SEGV;
-
- ret->rc = PTL_OK;
- ret->status_out = 0;
-
- /*
- * I hate this sort of code.... Hash tables, offset lists?
- * Treat the counters as an array of ints?
- */
- if (args->register_in == PTL_SR_DROP_COUNT)
- ret->status_out = count->drop_count;
-
- else if (args->register_in == PTL_SR_DROP_LENGTH)
- ret->status_out = count->drop_length;
-
- else if (args->register_in == PTL_SR_RECV_COUNT)
- ret->status_out = count->recv_count;
-
- else if (args->register_in == PTL_SR_RECV_LENGTH)
- ret->status_out = count->recv_length;
-
- else if (args->register_in == PTL_SR_SEND_COUNT)
- ret->status_out = count->send_count;
-
- else if (args->register_in == PTL_SR_SEND_LENGTH)
- ret->status_out = count->send_length;
-
- else if (args->register_in == PTL_SR_MSGS_MAX)
- ret->status_out = count->msgs_max;
- else
- ret->rc = PTL_INV_SR_INDX;
- lib_nal_t *nal = apinal->nal_data;
- lib_ni_t *ni = &nal->libnal_ni;
- lib_counters_t *count = &ni->ni_counters;
--
- return ret->rc;
- switch (sr_idx) {
- case PTL_SR_DROP_COUNT:
- *status = count->drop_count;
- return PTL_OK;
- case PTL_SR_DROP_LENGTH:
- *status = count->drop_length;
- return PTL_OK;
- case PTL_SR_RECV_COUNT:
- *status = count->recv_count;
- return PTL_OK;
- case PTL_SR_RECV_LENGTH:
- *status = count->recv_length;
- return PTL_OK;
- case PTL_SR_SEND_COUNT:
- *status = count->send_count;
- return PTL_OK;
- case PTL_SR_SEND_LENGTH:
- *status = count->send_length;
- return PTL_OK;
- case PTL_SR_MSGS_MAX:
- *status = count->msgs_max;
- return PTL_OK;
- default:
- *status = 0;
- return PTL_SR_INDEX_INVALID;
- }
--}
--
--
- int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret)
-int lib_api_ni_dist (nal_t *apinal, ptl_process_id_t *pid, unsigned long *dist)
--{
- /*
- * Incoming:
- * ptl_handle_ni_t interface_in
- * ptl_process_id_t process_in
-
- *
- * Outgoing:
- * unsigned long * distance_out
-
- */
-
- PtlNIDist_in *args = v_args;
- PtlNIDist_out *ret = v_ret;
-
- unsigned long dist;
- ptl_process_id_t id_in = args->process_in;
- ptl_nid_t nid;
- int rc;
-
- nid = id_in.nid;
-
- if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) {
- ret->distance_out = (unsigned long) MAX_DIST;
- return PTL_INV_PROC;
- }
-
- ret->distance_out = dist;
- lib_nal_t *nal = apinal->nal_data;
--
- return ret->rc = PTL_OK;
- return (nal->libnal_dist(nal, pid->nid, dist));
--}
+++ /dev/null
--Makefile
--Makefile.in
--aclocal.m4
--config.log
--config.status
--config.cache
--configure
--portals.spec
+++ /dev/null
--# Copyright (C) 2002 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--EXTRA_DIST = portals.spec
+++ /dev/null
--%define kversion @RELEASE@
--%define linuxdir @LINUX@
--%define version HEAD
--
--Summary: Sandia Portals Message Passing - utilities
--Name: portals
--Version: %{version}
--Release: 0210101748uml
--Copyright: LGPL
--Group: Utilities/System
--BuildRoot: /var/tmp/portals-%{version}-root
--Source: http://sandiaportals.org/portals-%{version}.tar.gz
--
--%description
--Sandia Portals message passing package. Contains kernel modules, libraries and utilities.
--
--%package -n portals-modules
--Summary: Kernel modules and NAL's for portals
--Group: Development/Kernel
--
--%description -n portals-modules
--Object-Based Disk storage drivers for Linux %{kversion}.
--
--%package -n portals-source
--Summary: Portals kernel source for rebuilding with other kernels
--Group: Development/Kernel
--
--%description -n portals-source
--Portals kernel source for rebuilding with other kernels
--
--%prep
--%setup -n portals-%{version}
--
--%build
--rm -rf $RPM_BUILD_ROOT
--
--# Create the pristine source directory.
--srcdir=$RPM_BUILD_ROOT/usr/src/portals-%{version}
--mkdir -p $srcdir
--find . -name CVS -prune -o -print | cpio -ap $srcdir
--
--# Set an explicit path to our Linux tree, if we can.
--conf_flag=
--linuxdir=%{linuxdir}
--test -d $linuxdir && conf_flag=--with-linux=$linuxdir
--./configure $conf_flag
--make
--
--%install
--make install prefix=$RPM_BUILD_ROOT
--
--%ifarch alpha
--# this hurts me
-- conf_flag=
-- linuxdir=%{linuxdir}
-- test -d $linuxdir && conf_flag=--with-linux=$linuxdir
-- make clean
-- ./configure --enable-rtscts-myrinet $conf_flag
-- make
-- cp linux/rtscts/rtscts.o $RPM_BUILD_ROOT/lib/modules/%{kversion}/kernel/net/portals/rtscts_myrinet.o
-- cp user/myrinet_utils/mcpload $RPM_BUILD_ROOT/usr/sbin/mcpload
--%endif
--
--
--%files
--%attr(-, root, root) %doc COPYING
--%attr(-, root, root) /usr/sbin/acceptor
--%attr(-, root, root) /usr/sbin/ptlctl
--%attr(-, root, root) /usr/sbin/debugctl
--%ifarch alpha
--%attr(-, root, root) /usr/sbin/mcpload
--%endif
--%attr(-, root, root) /lib/libmyrnal.a
--%attr(-, root, root) /lib/libptlapi.a
--%attr(-, root, root) /lib/libptlctl.a
--%attr(-, root, root) /lib/libprocbridge.a
--%attr(-, root, root) /lib/libptllib.a
--%attr(-, root, root) /lib/libtcpnal.a
--%attr(-, root, root) /lib/libtcpnalutil.a
--%attr(-, root, root) /usr/include/portals/*.h
--%attr(-, root, root) /usr/include/portals/base/*.h
--%attr(-, root, root) /usr/include/linux/*.h
--
--%files -n portals-modules
--%attr(-, root, root) %doc COPYING
--%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/portals.o
--%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptlrouter.o
--%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptrxtx.o
--%ifarch alpha
--%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/p3mod.o
--%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/rtscts.o
--%endif
--%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/*nal.o
--
--%files -n portals-source
--%attr(-, root, root) /usr/src/portals-%{version}
--
--%post
--if [ ! -e /dev/portals ]; then
-- mknod /dev/portals c 10 240
--fi
--depmod -ae || exit 0
--
--grep -q portals /etc/modules.conf || \
-- echo 'alias char-major-10-240 portals' >> /etc/modules.conf
--
--grep -q '/dev/portals' /etc/modules.conf || \
-- echo 'alias /dev/portals portals' >> /etc/modules.conf
--
--%postun
--depmod -ae || exit 0
--
--%clean
--#rm -rf $RPM_BUILD_ROOT
--
--# end of file
+++ /dev/null
--.deps
--Makefile
--.*.cmd
--autoMakefile.in
--autoMakefile
--*.ko
--*.mod.c
--.*.flags
--.tmp_versions
--.depend
+++ /dev/null
--MODULES := kptlrouter
--kptlrouter-objs := router.o proc.o
--
--@INCLUDE_RULES@
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--include $(src)/../Kernelenv
--
--obj-y += kptlrouter.o
--kptlrouter-objs := router.o proc.o
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--if MODULES
--if !CRAY_PORTALS
--modulenet_DATA = kptlrouter$(KMODEXT)
--endif
--endif
--
--MOSTLYCLEANFILES = *.o *.ko *.mod.c
--DIST_SOURCES = $(kptlrouter-objs:%.o=%.c) router.h
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals
-- * http://sourceforge.net/projects/sandiaportals/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#include "router.h"
--
--#define KPR_PROC_ROUTER "sys/portals/router"
--#define KPR_PROC_ROUTES "sys/portals/routes"
--
--/* Used for multi-page route list book keeping */
--struct proc_route_data {
-- struct list_head *curr;
-- unsigned int generation;
-- off_t skip;
--} kpr_read_routes_data;
--
--/* nal2name support re-used from utils/portals.c */
--struct name2num {
-- char *name;
-- int num;
--} nalnames[] = {
-- { "any", 0},
-- { "elan", QSWNAL},
-- { "tcp", SOCKNAL},
-- { "gm", GMNAL},
- { "ib", IBNAL},
- { "ib", OPENIBNAL},
-- { NULL, -1}
--};
--
--static struct name2num *name2num_lookup_num(struct name2num *table, int num)
--{
-- while (table->name != NULL)
-- if (num == table->num)
-- return (table);
-- else
-- table++;
-- return (NULL);
--}
--
--static char *nal2name(int nal)
--{
-- struct name2num *e = name2num_lookup_num(nalnames, nal);
-- return ((e == NULL) ? "???" : e->name);
--}
--
--
--static int kpr_proc_router_read(char *page, char **start, off_t off,
-- int count, int *eof, void *data)
--{
-- unsigned long long bytes = kpr_fwd_bytes;
-- unsigned long packets = kpr_fwd_packets;
-- unsigned long errors = kpr_fwd_errors;
-- unsigned int qdepth = atomic_read (&kpr_queue_depth);
-- int len;
--
-- *eof = 1;
-- if (off != 0)
-- return (0);
--
-- len = sprintf(page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth);
--
-- *start = page;
-- return (len);
--}
--
--static int kpr_proc_router_write(struct file *file, const char *ubuffer,
-- unsigned long count, void *data)
--{
-- /* Ignore what we've been asked to write, and just zero the stats */
-- kpr_fwd_bytes = 0;
-- kpr_fwd_packets = 0;
-- kpr_fwd_errors = 0;
--
-- return (count);
--}
--
--static int kpr_proc_routes_read(char *page, char **start, off_t off,
-- int count, int *eof, void *data)
--{
-- struct proc_route_data *prd = data;
-- kpr_route_entry_t *re;
-- kpr_gateway_entry_t *ge;
-- int chunk_len = 0;
-- int line_len = 0;
-- int user_len = 0;
--
-- *eof = 1;
-- *start = page;
--
-- if (prd->curr == NULL) {
-- if (off != 0)
-- return 0;
--
-- /* First pass, initialize our private data */
-- prd->curr = kpr_routes.next;
-- prd->generation = kpr_routes_generation;
-- prd->skip = 0;
-- } else {
-- /* Abort route list generation change */
-- if (prd->generation != kpr_routes_generation) {
-- prd->curr = NULL;
-- return sprintf(page, "\nError: Routes Changed\n");
-- }
--
-- /* All the routes have been walked */
-- if (prd->curr == &kpr_routes) {
-- prd->curr = NULL;
-- return 0;
-- }
-- }
--
-- read_lock(&kpr_rwlock);
-- *start = page + prd->skip;
-- user_len = -prd->skip;
--
- while ((prd->curr != NULL) && (prd->curr != &kpr_routes)) {
- for (; prd->curr != &kpr_routes; prd->curr = prd->curr->next) {
-- re = list_entry(prd->curr, kpr_route_entry_t, kpre_list);
-- ge = re->kpre_gateway;
--
-- line_len = sprintf(page + chunk_len,
-- "%12s "LPX64" : "LPX64" - "LPX64", %s\n",
-- nal2name(ge->kpge_nalid), ge->kpge_nid,
-- re->kpre_lo_nid, re->kpre_hi_nid,
-- ge->kpge_alive ? "up" : "down");
-- chunk_len += line_len;
-- user_len += line_len;
-
- /* Abort the route list changed */
- if (prd->curr->next == NULL) {
- prd->curr = NULL;
- read_unlock(&kpr_rwlock);
- return sprintf(page, "\nError: Routes Changed\n");
- }
-
- prd->curr = prd->curr->next;
--
- /* The route table will exceed one page, break the while loop
- * so the function can be re-called with a new page.
- */
- if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count))
- /* The route table will exceed one page */
- if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count)) {
- prd->curr = prd->curr->next;
-- break;
- }
-- }
--
-- *eof = 0;
--
-- /* Caller received only a portion of the last entry, the
-- * remaining will be delivered in the next page if asked for.
-- */
-- if (user_len > count) {
-- prd->curr = prd->curr->prev;
-- prd->skip = line_len - (user_len - count);
-- read_unlock(&kpr_rwlock);
-- return count;
-- }
--
-- /* Not enough data to entirely satify callers request */
-- prd->skip = 0;
-- read_unlock(&kpr_rwlock);
-- return user_len;
--}
--
--static int kpr_proc_routes_write(struct file *file, const char *ubuffer,
-- unsigned long count, void *data)
--{
-- /* no-op; lctl should be used to adjust the routes */
-- return (count);
--}
--
--void kpr_proc_init(void)
--{
-- struct proc_dir_entry *router_entry;
-- struct proc_dir_entry *routes_entry;
--
-- /* Initialize KPR_PROC_ROUTER */
-- router_entry = create_proc_entry (KPR_PROC_ROUTER,
-- S_IFREG | S_IRUGO | S_IWUSR, NULL);
--
-- if (router_entry == NULL) {
-- CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER);
-- return;
-- }
--
-- router_entry->data = NULL;
-- router_entry->read_proc = kpr_proc_router_read;
-- router_entry->write_proc = kpr_proc_router_write;
--
-- /* Initialize KPR_PROC_ROUTES */
-- routes_entry = create_proc_entry (KPR_PROC_ROUTES,
-- S_IFREG | S_IRUGO | S_IWUSR, NULL);
--
-- if (routes_entry == NULL) {
-- CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTES);
-- return;
-- }
--
-- kpr_read_routes_data.curr = NULL;
-- kpr_read_routes_data.generation = 0;
-- kpr_read_routes_data.skip = 0;
--
-- routes_entry->data = &kpr_read_routes_data;
-- routes_entry->read_proc = kpr_proc_routes_read;
-- routes_entry->write_proc = kpr_proc_routes_write;
--}
--
--void kpr_proc_fini(void)
--{
-- remove_proc_entry(KPR_PROC_ROUTER, 0);
-- remove_proc_entry(KPR_PROC_ROUTES, 0);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals
-- * http://sourceforge.net/projects/sandiaportals/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#include "router.h"
--
--LIST_HEAD(kpr_routes);
--LIST_HEAD(kpr_gateways);
--LIST_HEAD(kpr_nals);
--
--unsigned int kpr_routes_generation;
--unsigned long long kpr_fwd_bytes;
--unsigned long kpr_fwd_packets;
--unsigned long kpr_fwd_errors;
--atomic_t kpr_queue_depth;
--
--/* Mostly the tables are read-only (thread and interrupt context)
-- *
-- * Once in a blue moon we register/deregister NALs and add/remove routing
-- * entries (thread context only)... */
--rwlock_t kpr_rwlock = RW_LOCK_UNLOCKED;
--
--kpr_router_interface_t kpr_router_interface = {
-- kprri_register: kpr_register_nal,
-- kprri_lookup: kpr_lookup_target,
-- kprri_fwd_start: kpr_forward_packet,
-- kprri_fwd_done: kpr_complete_packet,
-- kprri_notify: kpr_nal_notify,
-- kprri_shutdown: kpr_shutdown_nal,
-- kprri_deregister: kpr_deregister_nal,
- };
-
- kpr_control_interface_t kpr_control_interface = {
- kprci_add_route: kpr_add_route,
- kprci_del_route: kpr_del_route,
- kprci_get_route: kpr_get_route,
- kprci_notify: kpr_sys_notify,
--};
--
--int
--kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
--{
-- unsigned long flags;
-- struct list_head *e;
-- kpr_nal_entry_t *ne;
--
-- CDEBUG (D_NET, "Registering NAL %d\n", nalif->kprni_nalid);
--
-- PORTAL_ALLOC (ne, sizeof (*ne));
-- if (ne == NULL)
-- return (-ENOMEM);
--
-- memset (ne, 0, sizeof (*ne));
-- memcpy ((void *)&ne->kpne_interface, (void *)nalif, sizeof (*nalif));
--
-- LASSERT (!in_interrupt());
-- write_lock_irqsave (&kpr_rwlock, flags);
--
-- for (e = kpr_nals.next; e != &kpr_nals; e = e->next)
-- {
-- kpr_nal_entry_t *ne2 = list_entry (e, kpr_nal_entry_t, kpne_list);
--
-- if (ne2->kpne_interface.kprni_nalid == ne->kpne_interface.kprni_nalid)
-- {
-- write_unlock_irqrestore (&kpr_rwlock, flags);
--
-- CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid);
--
-- PORTAL_FREE (ne, sizeof (*ne));
-- return (-EEXIST);
-- }
-- }
--
-- list_add (&ne->kpne_list, &kpr_nals);
--
-- write_unlock_irqrestore (&kpr_rwlock, flags);
--
-- *argp = ne;
-- PORTAL_MODULE_USE;
-- return (0);
--}
--
--void
--kpr_do_upcall (void *arg)
--{
-- kpr_upcall_t *u = (kpr_upcall_t *)arg;
-- char nalstr[10];
-- char nidstr[36];
-- char whenstr[36];
-- char *argv[] = {
-- NULL,
-- "ROUTER_NOTIFY",
-- nalstr,
-- nidstr,
-- u->kpru_alive ? "up" : "down",
-- whenstr,
-- NULL};
--
-- snprintf (nalstr, sizeof(nalstr), "%d", u->kpru_nal_id);
-- snprintf (nidstr, sizeof(nidstr), LPX64, u->kpru_nid);
-- snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when);
--
-- portals_run_upcall (argv);
--
-- kfree (u);
--}
--
--void
--kpr_upcall (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when)
--{
-- char str[PTL_NALFMT_SIZE];
--
-- /* May be in arbitrary context */
-- kpr_upcall_t *u = kmalloc (sizeof (kpr_upcall_t), GFP_ATOMIC);
--
-- if (u == NULL) {
-- CERROR ("Upcall out of memory: nal %d nid "LPX64" (%s) %s\n",
-- gw_nalid, gw_nid,
-- portals_nid2str(gw_nalid, gw_nid, str),
-- alive ? "up" : "down");
-- return;
-- }
--
-- u->kpru_nal_id = gw_nalid;
-- u->kpru_nid = gw_nid;
-- u->kpru_alive = alive;
-- u->kpru_when = when;
--
-- prepare_work (&u->kpru_tq, kpr_do_upcall, u);
-- schedule_work (&u->kpru_tq);
--}
--
--int
--kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
-- int alive, time_t when)
--{
-- unsigned long flags;
-- int found;
-- kpr_nal_entry_t *ne = NULL;
-- kpr_gateway_entry_t *ge = NULL;
-- struct timeval now;
-- struct list_head *e;
-- struct list_head *n;
-- char str[PTL_NALFMT_SIZE];
--
-- CDEBUG (D_NET, "%s notifying [%d] "LPX64": %s\n",
-- byNal ? "NAL" : "userspace",
-- gateway_nalid, gateway_nid, alive ? "up" : "down");
--
-- /* can't do predictions... */
-- do_gettimeofday (&now);
-- if (when > now.tv_sec) {
-- CWARN ("Ignoring prediction from %s of [%d] "LPX64" %s "
-- "%ld seconds in the future\n",
-- byNal ? "NAL" : "userspace",
-- gateway_nalid, gateway_nid,
-- alive ? "up" : "down",
-- when - now.tv_sec);
-- return (EINVAL);
-- }
--
-- LASSERT (when <= now.tv_sec);
--
-- /* Serialise with lookups (i.e. write lock) */
-- write_lock_irqsave(&kpr_rwlock, flags);
--
-- found = 0;
-- list_for_each_safe (e, n, &kpr_gateways) {
--
-- ge = list_entry(e, kpr_gateway_entry_t, kpge_list);
-- if ((gateway_nalid != 0 &&
-- ge->kpge_nalid != gateway_nalid) ||
-- ge->kpge_nid != gateway_nid)
-- continue;
--
-- found = 1;
-- break;
-- }
--
-- if (!found) {
-- /* gateway not found */
-- write_unlock_irqrestore(&kpr_rwlock, flags);
-- CDEBUG (D_NET, "Gateway not found\n");
-- return (0);
-- }
--
-- if (when < ge->kpge_timestamp) {
-- /* out of date information */
-- write_unlock_irqrestore (&kpr_rwlock, flags);
-- CDEBUG (D_NET, "Out of date\n");
-- return (0);
-- }
--
-- /* update timestamp */
-- ge->kpge_timestamp = when;
--
-- if ((!ge->kpge_alive) == (!alive)) {
-- /* new date for old news */
-- write_unlock_irqrestore (&kpr_rwlock, flags);
-- CDEBUG (D_NET, "Old news\n");
-- return (0);
-- }
--
-- ge->kpge_alive = alive;
-- CDEBUG(D_NET, "set "LPX64" [%p] %d\n", gateway_nid, ge, alive);
--
-- if (alive) {
-- /* Reset all gateway weights so the newly-enabled gateway
-- * doesn't have to play catch-up */
-- list_for_each_safe (e, n, &kpr_gateways) {
-- kpr_gateway_entry_t *ge = list_entry(e, kpr_gateway_entry_t,
-- kpge_list);
-- atomic_set (&ge->kpge_weight, 0);
-- }
-- }
--
-- found = 0;
-- if (!byNal) {
-- /* userland notified me: notify NAL? */
-- ne = kpr_find_nal_entry_locked (ge->kpge_nalid);
-- if (ne != NULL) {
-- if (!ne->kpne_shutdown &&
-- ne->kpne_interface.kprni_notify != NULL) {
-- /* take a ref on this NAL until notifying
-- * it has completed... */
-- atomic_inc (&ne->kpne_refcount);
-- found = 1;
-- }
-- }
-- }
--
-- write_unlock_irqrestore(&kpr_rwlock, flags);
--
-- if (found) {
-- ne->kpne_interface.kprni_notify (ne->kpne_interface.kprni_arg,
-- gateway_nid, alive);
-- /* 'ne' can disappear now... */
-- atomic_dec (&ne->kpne_refcount);
-- }
--
-- if (byNal) {
-- /* It wasn't userland that notified me... */
-- CWARN ("Upcall: NAL %d NID "LPX64" (%s) is %s\n",
-- gateway_nalid, gateway_nid,
-- portals_nid2str(gateway_nalid, gateway_nid, str),
-- alive ? "alive" : "dead");
-- kpr_upcall (gateway_nalid, gateway_nid, alive, when);
-- } else {
-- CDEBUG (D_NET, " NOT Doing upcall\n");
-- }
--
-- return (0);
--}
--
--void
--kpr_nal_notify (void *arg, ptl_nid_t peer, int alive, time_t when)
--{
-- kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
--
-- kpr_do_notify (1, ne->kpne_interface.kprni_nalid, peer, alive, when);
--}
--
--void
--kpr_shutdown_nal (void *arg)
--{
-- unsigned long flags;
-- kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
--
-- CDEBUG (D_NET, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
--
-- LASSERT (!ne->kpne_shutdown);
-- LASSERT (!in_interrupt());
--
- write_lock_irqsave (&kpr_rwlock, flags); /* locking a bit spurious... */
- write_lock_irqsave (&kpr_rwlock, flags);
-- ne->kpne_shutdown = 1;
- write_unlock_irqrestore (&kpr_rwlock, flags); /* except it's a memory barrier */
-
- while (atomic_read (&ne->kpne_refcount) != 0)
- {
- CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n",
- ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount));
-
- set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (HZ);
- }
- write_unlock_irqrestore (&kpr_rwlock, flags);
--}
--
--void
--kpr_deregister_nal (void *arg)
--{
-- unsigned long flags;
-- kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
--
-- CDEBUG (D_NET, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
--
-- LASSERT (ne->kpne_shutdown); /* caller must have issued shutdown already */
- LASSERT (atomic_read (&ne->kpne_refcount) == 0); /* can't be busy */
-- LASSERT (!in_interrupt());
--
-- write_lock_irqsave (&kpr_rwlock, flags);
-
-- list_del (&ne->kpne_list);
-
-- write_unlock_irqrestore (&kpr_rwlock, flags);
-
- /* Wait until all outstanding messages/notifications have completed */
- while (atomic_read (&ne->kpne_refcount) != 0)
- {
- CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n",
- ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount));
-
- set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (HZ);
- }
--
-- PORTAL_FREE (ne, sizeof (*ne));
-- PORTAL_MODULE_UNUSE;
--}
--
--int
--kpr_ge_isbetter (kpr_gateway_entry_t *ge1, kpr_gateway_entry_t *ge2)
--{
-- const int significant_bits = 0x00ffffff;
-- /* We use atomic_t to record/compare route weights for
-- * load-balancing. Here we limit ourselves to only using
-- * 'significant_bits' when we do an 'after' comparison */
--
-- int diff = (atomic_read (&ge1->kpge_weight) -
-- atomic_read (&ge2->kpge_weight)) & significant_bits;
-- int rc = (diff > (significant_bits >> 1));
--
-- CDEBUG(D_NET, "[%p]"LPX64"=%d %s [%p]"LPX64"=%d\n",
-- ge1, ge1->kpge_nid, atomic_read (&ge1->kpge_weight),
-- rc ? ">" : "<",
-- ge2, ge2->kpge_nid, atomic_read (&ge2->kpge_weight));
--
-- return (rc);
--}
--
--void
--kpr_update_weight (kpr_gateway_entry_t *ge, int nob)
--{
-- int weight = 1 + (nob + sizeof (ptl_hdr_t)/2)/sizeof (ptl_hdr_t);
--
-- /* We've chosen this route entry (i.e. gateway) to forward payload
-- * of length 'nob'; update the route's weight to make it less
-- * favoured. Note that the weight is 1 plus the payload size
-- * rounded and scaled to the portals header size, so we get better
-- * use of the significant bits in kpge_weight. */
--
-- CDEBUG(D_NET, "gateway [%p]"LPX64" += %d\n", ge,
-- ge->kpge_nid, weight);
--
-- atomic_add (weight, &ge->kpge_weight);
--}
--
--int
--kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob,
-- ptl_nid_t *gateway_nidp)
--{
-- kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
-- struct list_head *e;
-- kpr_route_entry_t *re;
-- kpr_gateway_entry_t *ge = NULL;
-- int rc = -ENOENT;
--
-- /* Caller wants to know if 'target_nid' can be reached via a gateway
-- * ON HER OWN NETWORK */
--
-- CDEBUG (D_NET, "lookup "LPX64" from NAL %d\n", target_nid,
-- ne->kpne_interface.kprni_nalid);
-
- if (ne->kpne_shutdown) /* caller is shutting down */
- return (-ENOENT);
- LASSERT (!in_interrupt());
--
-- read_lock (&kpr_rwlock);
-
- if (ne->kpne_shutdown) { /* caller is shutting down */
- read_unlock (&kpr_rwlock);
- return (-ENOENT);
- }
--
-- /* Search routes for one that has a gateway to target_nid on the callers network */
--
-- list_for_each (e, &kpr_routes) {
-- re = list_entry (e, kpr_route_entry_t, kpre_list);
--
-- if (re->kpre_lo_nid > target_nid ||
-- re->kpre_hi_nid < target_nid)
-- continue;
--
-- /* found table entry */
--
-- if (re->kpre_gateway->kpge_nalid != ne->kpne_interface.kprni_nalid ||
-- !re->kpre_gateway->kpge_alive) {
-- /* different NAL or gateway down */
-- rc = -EHOSTUNREACH;
-- continue;
-- }
--
-- if (ge == NULL ||
-- kpr_ge_isbetter (re->kpre_gateway, ge))
-- ge = re->kpre_gateway;
-- }
--
-- if (ge != NULL) {
-- kpr_update_weight (ge, nob);
-- *gateway_nidp = ge->kpge_nid;
-- rc = 0;
-- }
--
-- read_unlock (&kpr_rwlock);
--
-- /* NB can't deref 're' now; it might have been removed! */
--
-- CDEBUG (D_NET, "lookup "LPX64" from NAL %d: %d ("LPX64")\n",
-- target_nid, ne->kpne_interface.kprni_nalid, rc,
-- (rc == 0) ? *gateway_nidp : (ptl_nid_t)0);
-- return (rc);
--}
--
--kpr_nal_entry_t *
--kpr_find_nal_entry_locked (int nal_id)
--{
-- struct list_head *e;
--
-- /* Called with kpr_rwlock held */
--
-- list_for_each (e, &kpr_nals) {
-- kpr_nal_entry_t *ne = list_entry (e, kpr_nal_entry_t, kpne_list);
--
-- if (nal_id != ne->kpne_interface.kprni_nalid) /* no match */
-- continue;
--
-- return (ne);
-- }
--
-- return (NULL);
--}
--
--void
--kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
--{
-- kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)arg;
-- ptl_nid_t target_nid = fwd->kprfd_target_nid;
-- int nob = fwd->kprfd_nob;
-- kpr_gateway_entry_t *ge = NULL;
-- kpr_nal_entry_t *dst_ne = NULL;
-- struct list_head *e;
-- kpr_route_entry_t *re;
-- kpr_nal_entry_t *tmp_ne;
- int rc;
--
-- CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd,
-- target_nid, src_ne->kpne_interface.kprni_nalid);
--
-- LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov));
-
- atomic_inc (&kpr_queue_depth);
- atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
- LASSERT (!in_interrupt());
-
- read_lock (&kpr_rwlock);
--
-- kpr_fwd_packets++; /* (loose) stats accounting */
-- kpr_fwd_bytes += nob + sizeof(ptl_hdr_t);
--
- if (src_ne->kpne_shutdown) /* caller is shutting down */
- if (src_ne->kpne_shutdown) { /* caller is shutting down */
- rc = -ESHUTDOWN;
-- goto out;
- }
--
-- fwd->kprfd_router_arg = src_ne; /* stash caller's nal entry */
-
- read_lock (&kpr_rwlock);
--
-- /* Search routes for one that has a gateway to target_nid NOT on the caller's network */
--
-- list_for_each (e, &kpr_routes) {
-- re = list_entry (e, kpr_route_entry_t, kpre_list);
--
-- if (re->kpre_lo_nid > target_nid || /* no match */
-- re->kpre_hi_nid < target_nid)
-- continue;
--
-- if (re->kpre_gateway->kpge_nalid == src_ne->kpne_interface.kprni_nalid)
-- continue; /* don't route to same NAL */
--
-- if (!re->kpre_gateway->kpge_alive)
-- continue; /* gateway is dead */
--
-- tmp_ne = kpr_find_nal_entry_locked (re->kpre_gateway->kpge_nalid);
--
-- if (tmp_ne == NULL ||
-- tmp_ne->kpne_shutdown) {
-- /* NAL must be registered and not shutting down */
-- continue;
-- }
--
-- if (ge == NULL ||
-- kpr_ge_isbetter (re->kpre_gateway, ge)) {
-- ge = re->kpre_gateway;
-- dst_ne = tmp_ne;
-- }
-- }
--
-- if (ge != NULL) {
-- LASSERT (dst_ne != NULL);
--
-- kpr_update_weight (ge, nob);
--
-- fwd->kprfd_gateway_nid = ge->kpge_nid;
- atomic_inc (&dst_ne->kpne_refcount); /* dest nal is busy until fwd completes */
- atomic_inc (&src_ne->kpne_refcount); /* source and dest nals are */
- atomic_inc (&dst_ne->kpne_refcount); /* busy until fwd completes */
- atomic_inc (&kpr_queue_depth);
--
-- read_unlock (&kpr_rwlock);
--
-- CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d: "
-- "to "LPX64" on NAL %d\n",
-- fwd, target_nid, src_ne->kpne_interface.kprni_nalid,
-- fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid);
--
-- dst_ne->kpne_interface.kprni_fwd (dst_ne->kpne_interface.kprni_arg, fwd);
-- return;
-- }
--
- read_unlock (&kpr_rwlock);
- rc = -EHOSTUNREACH;
-- out:
-- kpr_fwd_errors++;
--
- CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d\n", fwd,
- target_nid, src_ne->kpne_interface.kprni_nalid);
- CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d: %d\n",
- fwd, target_nid, src_ne->kpne_interface.kprni_nalid, rc);
--
- /* Can't find anywhere to forward to */
- (fwd->kprfd_callback)(fwd->kprfd_callback_arg, -EHOSTUNREACH);
- (fwd->kprfd_callback)(fwd->kprfd_callback_arg, rc);
--
- atomic_dec (&kpr_queue_depth);
- atomic_dec (&src_ne->kpne_refcount);
- read_unlock (&kpr_rwlock);
--}
--
--void
--kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error)
--{
-- kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg;
-- kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg;
--
-- CDEBUG (D_NET, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd,
-- src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error);
--
-- atomic_dec (&dst_ne->kpne_refcount); /* CAVEAT EMPTOR dst_ne can disappear now!!! */
--
-- (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error);
--
-- CDEBUG (D_NET, "complete(2) [%p] from NAL %d: %d\n", fwd,
-- src_ne->kpne_interface.kprni_nalid, error);
--
-- atomic_dec (&kpr_queue_depth);
-- atomic_dec (&src_ne->kpne_refcount); /* CAVEAT EMPTOR src_ne can disappear now!!! */
--}
--
--int
--kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid,
-- ptl_nid_t lo_nid, ptl_nid_t hi_nid)
--{
-- unsigned long flags;
-- struct list_head *e;
-- kpr_route_entry_t *re;
-- kpr_gateway_entry_t *ge;
-- int dup = 0;
--
-- CDEBUG(D_NET, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n",
-- gateway_nalid, gateway_nid, lo_nid, hi_nid);
--
-- if (gateway_nalid == PTL_NID_ANY ||
-- lo_nid == PTL_NID_ANY ||
-- hi_nid == PTL_NID_ANY ||
-- lo_nid > hi_nid)
-- return (-EINVAL);
--
-- PORTAL_ALLOC (ge, sizeof (*ge));
-- if (ge == NULL)
-- return (-ENOMEM);
--
-- ge->kpge_nalid = gateway_nalid;
-- ge->kpge_nid = gateway_nid;
-- ge->kpge_alive = 1;
-- ge->kpge_timestamp = 0;
-- ge->kpge_refcount = 0;
-- atomic_set (&ge->kpge_weight, 0);
--
-- PORTAL_ALLOC (re, sizeof (*re));
-- if (re == NULL) {
-- PORTAL_FREE (ge, sizeof (*ge));
-- return (-ENOMEM);
-- }
--
-- re->kpre_lo_nid = lo_nid;
-- re->kpre_hi_nid = hi_nid;
--
-- LASSERT(!in_interrupt());
-- write_lock_irqsave (&kpr_rwlock, flags);
--
-- list_for_each (e, &kpr_gateways) {
-- kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t,
-- kpge_list);
--
-- if (ge2->kpge_nalid == gateway_nalid &&
-- ge2->kpge_nid == gateway_nid) {
-- PORTAL_FREE (ge, sizeof (*ge));
-- ge = ge2;
-- dup = 1;
-- break;
-- }
-- }
--
-- if (!dup) {
-- /* Adding a new gateway... */
-- list_add (&ge->kpge_list, &kpr_gateways);
--
-- /* ...zero all gateway weights so this one doesn't have to
-- * play catch-up */
--
-- list_for_each (e, &kpr_gateways) {
-- kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t,
-- kpge_list);
-- atomic_set (&ge2->kpge_weight, 0);
-- }
-- }
--
-- re->kpre_gateway = ge;
-- ge->kpge_refcount++;
-- list_add (&re->kpre_list, &kpr_routes);
-- kpr_routes_generation++;
--
-- write_unlock_irqrestore (&kpr_rwlock, flags);
-- return (0);
--}
--
--int
--kpr_sys_notify (int gateway_nalid, ptl_nid_t gateway_nid,
- int alive, time_t when)
- int alive, time_t when)
--{
-- return (kpr_do_notify (0, gateway_nalid, gateway_nid, alive, when));
--}
--
--int
--kpr_del_route (int gw_nalid, ptl_nid_t gw_nid,
-- ptl_nid_t lo, ptl_nid_t hi)
--{
-- int specific = (lo != PTL_NID_ANY);
-- unsigned long flags;
-- int rc = -ENOENT;
-- struct list_head *e;
-- struct list_head *n;
--
-- CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n",
-- gw_nalid, gw_nid, lo, hi);
--
-- LASSERT(!in_interrupt());
--
-- /* NB Caller may specify either all routes via the given gateway
-- * (lo/hi == PTL_NID_ANY) or a specific route entry (lo/hi are
-- * actual NIDs) */
-- if (specific ? (hi == PTL_NID_ANY || hi < lo) : (hi != PTL_NID_ANY))
-- return (-EINVAL);
--
-- write_lock_irqsave(&kpr_rwlock, flags);
--
-- list_for_each_safe (e, n, &kpr_routes) {
-- kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
-- kpre_list);
-- kpr_gateway_entry_t *ge = re->kpre_gateway;
--
-- if (ge->kpge_nalid != gw_nalid ||
-- ge->kpge_nid != gw_nid ||
-- (specific &&
-- (lo != re->kpre_lo_nid || hi != re->kpre_hi_nid)))
-- continue;
--
-- rc = 0;
--
-- if (--ge->kpge_refcount == 0) {
-- list_del (&ge->kpge_list);
-- PORTAL_FREE (ge, sizeof (*ge));
-- }
--
-- list_del (&re->kpre_list);
-- PORTAL_FREE(re, sizeof (*re));
--
-- if (specific)
-- break;
-- }
--
-- kpr_routes_generation++;
-- write_unlock_irqrestore(&kpr_rwlock, flags);
--
-- return (rc);
--}
--
--int
- kpr_get_route (int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
- ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive)
-kpr_get_route (int idx, __u32 *gateway_nalid, ptl_nid_t *gateway_nid,
- ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, __u32 *alive)
--{
-- struct list_head *e;
--
- LASSERT (!in_interrupt());
-- read_lock(&kpr_rwlock);
--
-- for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
-- kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
-- kpre_list);
-- kpr_gateway_entry_t *ge = re->kpre_gateway;
--
-- if (idx-- == 0) {
-- *gateway_nalid = ge->kpge_nalid;
-- *gateway_nid = ge->kpge_nid;
-- *alive = ge->kpge_alive;
-- *lo_nid = re->kpre_lo_nid;
-- *hi_nid = re->kpre_hi_nid;
--
-- read_unlock(&kpr_rwlock);
-- return (0);
-- }
-- }
--
-- read_unlock (&kpr_rwlock);
-- return (-ENOENT);
-}
-
-static int
-kpr_nal_cmd(struct portals_cfg *pcfg, void * private)
-{
- int err = -EINVAL;
- ENTRY;
-
- switch(pcfg->pcfg_command) {
- default:
- CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
- break;
-
- case NAL_CMD_ADD_ROUTE:
- CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
- pcfg->pcfg_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- break;
-
- case NAL_CMD_DEL_ROUTE:
- CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
- pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_nid2, pcfg->pcfg_nid3);
- break;
-
- case NAL_CMD_NOTIFY_ROUTER: {
- CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
- pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_flags ? "Enabling" : "Disabling",
- (time_t)pcfg->pcfg_nid3);
-
- err = kpr_sys_notify (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
- pcfg->pcfg_flags, (time_t)pcfg->pcfg_nid3);
- break;
- }
-
- case NAL_CMD_GET_ROUTE:
- CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
- err = kpr_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
- &pcfg->pcfg_nid,
- &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
- &pcfg->pcfg_flags);
- break;
- }
- RETURN(err);
--}
-
--
--static void /*__exit*/
--kpr_finalise (void)
--{
-- LASSERT (list_empty (&kpr_nals));
-
- libcfs_nal_cmd_unregister(ROUTER);
-
- PORTAL_SYMBOL_UNREGISTER(kpr_router_interface);
-
- kpr_proc_fini();
--
-- while (!list_empty (&kpr_routes)) {
-- kpr_route_entry_t *re = list_entry(kpr_routes.next,
-- kpr_route_entry_t,
-- kpre_list);
--
-- list_del(&re->kpre_list);
-- PORTAL_FREE(re, sizeof (*re));
-- }
-
- kpr_proc_fini();
-
- PORTAL_SYMBOL_UNREGISTER(kpr_router_interface);
- PORTAL_SYMBOL_UNREGISTER(kpr_control_interface);
--
-- CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n",
-- atomic_read(&portal_kmemory));
--}
--
--static int __init
--kpr_initialise (void)
--{
- int rc;
-
-- CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
-- atomic_read(&portal_kmemory));
--
-- kpr_routes_generation = 0;
-- kpr_proc_init();
--
- rc = libcfs_nal_cmd_register(ROUTER, kpr_nal_cmd, NULL);
- if (rc != 0) {
- CERROR("Can't register nal cmd handler\n");
- return (rc);
- }
-
-- PORTAL_SYMBOL_REGISTER(kpr_router_interface);
- PORTAL_SYMBOL_REGISTER(kpr_control_interface);
-- return (0);
--}
--
--MODULE_AUTHOR("Eric Barton");
--MODULE_DESCRIPTION("Kernel Portals Router v0.01");
--MODULE_LICENSE("GPL");
--
--module_init (kpr_initialise);
--module_exit (kpr_finalise);
--
- EXPORT_SYMBOL (kpr_control_interface);
--EXPORT_SYMBOL (kpr_router_interface);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals
-- * http://sourceforge.net/projects/sandiaportals/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#ifndef _KPTLROUTER_H
--#define _KPTLROUTER_H
--#ifndef EXPORT_SYMTAB
--# define EXPORT_SYMTAB
--#endif
--
--#include <linux/config.h>
--#include <linux/module.h>
--#include <linux/kernel.h>
--#include <linux/mm.h>
--#include <linux/string.h>
--#include <linux/errno.h>
--#include <linux/proc_fs.h>
--#include <linux/init.h>
--
--#define DEBUG_SUBSYSTEM S_PTLROUTER
--
--#include <linux/kp30.h>
--#include <linux/kpr.h>
--#include <portals/p30.h>
--#include <portals/lib-p30.h>
--
--typedef struct
--{
-- struct list_head kpne_list;
-- kpr_nal_interface_t kpne_interface;
-- atomic_t kpne_refcount;
-- int kpne_shutdown;
--} kpr_nal_entry_t;
--
--typedef struct
--{
-- struct list_head kpge_list;
-- atomic_t kpge_weight;
-- time_t kpge_timestamp;
-- int kpge_alive;
-- int kpge_nalid;
-- int kpge_refcount;
-- ptl_nid_t kpge_nid;
--} kpr_gateway_entry_t;
--
--typedef struct
--{
-- struct list_head kpre_list;
-- kpr_gateway_entry_t *kpre_gateway;
-- ptl_nid_t kpre_lo_nid;
-- ptl_nid_t kpre_hi_nid;
--} kpr_route_entry_t;
--
--typedef struct
--{
-- work_struct_t kpru_tq;
-- int kpru_nal_id;
-- ptl_nid_t kpru_nid;
-- int kpru_alive;
-- time_t kpru_when;
--} kpr_upcall_t;
--
--extern int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp);
--extern int kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob,
-- ptl_nid_t *gateway_nidp);
--extern kpr_nal_entry_t *kpr_find_nal_entry_locked (int nal_id);
--extern void kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd);
--extern void kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error);
--extern void kpr_nal_notify (void *arg, ptl_nid_t peer,
-- int alive, time_t when);
--extern void kpr_shutdown_nal (void *arg);
--extern void kpr_deregister_nal (void *arg);
--
--extern void kpr_proc_init (void);
--extern void kpr_proc_fini (void);
-
- extern int kpr_add_route (int gateway_nal, ptl_nid_t gateway_nid,
- ptl_nid_t lo_nid, ptl_nid_t hi_nid);
- extern int kpr_del_route (int gw_nal, ptl_nid_t gw_nid,
- ptl_nid_t lo, ptl_nid_t hi);
- extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid,
- ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, int *alive);
- extern int kpr_sys_notify (int gw_nalid, ptl_nid_t gw_nid,
- int alive, time_t when);
--
--extern unsigned int kpr_routes_generation;
--extern unsigned long long kpr_fwd_bytes;
--extern unsigned long kpr_fwd_packets;
--extern unsigned long kpr_fwd_errors;
--extern atomic_t kpr_queue_depth;
-
--extern struct list_head kpr_routes;
--extern rwlock_t kpr_rwlock;
--
--#endif /* _KPLROUTER_H */
+++ /dev/null
--Makefile
--.deps
--.*.cmd
--autoMakefile.in
--autoMakefile
--*.ko
--*.mod.c
--.*.flags
--.tmp_versions
--.depend
+++ /dev/null
--MODULES := pingsrv pingcli spingsrv spingcli
--pingsrv-objs := ping_srv.o
--
--ifeq ($(PATCHLEVEL),6)
--pingcli-objs := ping_cli.o
--spingsrv-objs := sping_srv.o
--spingcli-objs := sping_cli.o
--else
--ping%.c: ping_%.c
-- ln -sf $< $@
--
--sping%.c: sping_%.c
-- ln -sf $< $@
--endif
--
--@INCLUDE_RULES@
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--include $(src)/../Kernelenv
--
--obj-y += ping_cli.o
--obj-y += ping_srv.o
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--if MODULES
--if !CRAY_PORTALS
--if TESTS
--noinst_DATA := pingsrv$(KMODEXT) pingcli$(KMODEXT)
--noinst_DATA += spingsrv$(KMODEXT) spingcli$(KMODEXT)
--endif
--endif
--endif
--
--MOSTLYCLEANFILES = *.o *.ko *.mod.c pingsrv.c pingcli.c spingsrv.c spingcli.c
--DIST_SOURCES = ping_srv.c ping_cli.c sping_srv.c sping_cli.c ping.h
+++ /dev/null
--#ifndef _KPING_INCLUDED
--#define _KPING_INCLUDED
--
--#include <portals/p30.h>
--
--
--#define PTL_PING_IN_SIZE 256 // n packets per buffer
--#define PTL_PING_IN_BUFFERS 2 // n fallback buffers
--
--#define PTL_PING_CLIENT 4
--#define PTL_PING_SERVER 5
--
--#define PING_HEADER_MAGIC 0xDEADBEEF
--#define PING_BULK_MAGIC 0xCAFEBABE
--
--#define PING_HEAD_BITS 0x00000001
--#define PING_BULK_BITS 0x00000002
--#define PING_IGNORE_BITS 0xFFFFFFFC
--
--#define PTL_PING_ACK 0x01
--#define PTL_PING_VERBOSE 0x02
--#define PTL_PING_VERIFY 0x04
--#define PTL_PING_PREALLOC 0x08
--
--
--#define NEXT_PRIMARY_BUFFER(index) \
-- (((index + 1) >= PTL_PING_IN_BUFFERS) ? 0 : (index + 1))
--
--#define PDEBUG(str, err) \
-- CERROR ("%s: error=%s (%d)\n", str, ptl_err_str[err], err)
--
--
--/* Ping data to be passed via the ioctl to kernel space */
--
--#if __KERNEL__
--
--
--#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
--#include <linux/workqueue.h>
--#else
--#include <linux/tqueue.h>
--#endif
--struct pingsrv_data {
--
-- ptl_handle_ni_t ni;
-- ptl_handle_me_t me;
-- ptl_handle_eq_t eq;
-- void *in_buf;
-- ptl_process_id_t my_id;
-- ptl_process_id_t id_local;
-- ptl_md_t mdin;
-- ptl_md_t mdout;
-- ptl_handle_md_t mdin_h;
-- ptl_handle_md_t mdout_h;
-- ptl_event_t evnt;
-- struct task_struct *tsk;
--}; /* struct pingsrv_data */
--
--struct pingcli_data {
--
-- struct portal_ioctl_data *args;
-- ptl_handle_me_t me;
-- ptl_handle_eq_t eq;
-- char *inbuf;
-- char *outbuf;
-- ptl_process_id_t myid;
-- ptl_process_id_t id_local;
-- ptl_process_id_t id_remote;
-- ptl_md_t md_in_head;
-- ptl_md_t md_out_head;
-- ptl_handle_md_t md_in_head_h;
-- ptl_handle_md_t md_out_head_h;
-- ptl_event_t ev;
-- struct task_struct *tsk;
--}; /* struct pingcli_data */
--
--
--#endif /* __KERNEL__ */
--
--#endif /* _KPING_INCLUDED */
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
-- * Author: Brian Behlendorf <behlendorf1@llnl.gov>
-- * Kedar Sovani (kedar@calsoftinc.com)
-- * Amey Inamdar (amey@calsoftinc.com)
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#define DEBUG_SUBSYSTEM S_PINGER
--
--#include <linux/kp30.h>
--#include <portals/p30.h>
--#include <linux/module.h>
--#include <linux/proc_fs.h>
--#include <linux/init.h>
--#include <linux/poll.h>
--#include "ping.h"
--/* int portal_debug = D_PING_CLI; */
--
--
--#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
--
--#define MAX_TIME 100000
--
--/* This should be enclosed in a structure */
--
--static struct pingcli_data *client = NULL;
--
--static int count = 0;
--
--static void
- pingcli_shutdown(int err)
-pingcli_shutdown(ptl_handle_ni_t nih, int err)
--{
-- int rc;
--
-- /* Yes, we are intentionally allowing us to fall through each
-- * case in to the next. This allows us to pass an error
-- * code to just clean up the right stuff.
-- */
-- switch (err) {
-- case 1:
-- /* Unlink any memory descriptors we may have used */
-- if ((rc = PtlMDUnlink (client->md_out_head_h)))
-- PDEBUG ("PtlMDUnlink", rc);
-- case 2:
-- if ((rc = PtlMDUnlink (client->md_in_head_h)))
-- PDEBUG ("PtlMDUnlink", rc);
--
-- /* Free the event queue */
-- if ((rc = PtlEQFree (client->eq)))
-- PDEBUG ("PtlEQFree", rc);
--
-- if ((rc = PtlMEUnlink (client->me)))
-- PDEBUG ("PtlMEUnlink", rc);
-- case 3:
- kportal_put_ni (client->args->ioc_nal);
- PtlNIFini(nih);
--
-- case 4:
-- /* Free our buffers */
--
-- if (client != NULL)
-- PORTAL_FREE (client,
-- sizeof(struct pingcli_data));
-- }
--
--
-- CDEBUG (D_OTHER, "ping client released resources\n");
--} /* pingcli_shutdown() */
--
- static int pingcli_callback(ptl_event_t *ev)
-static void pingcli_callback(ptl_event_t *ev)
--{
-- int i, magic;
- i = *(int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned));
- magic = *(int *)(ev->mem_desc.start + ev->offset);
- i = *(int *)(ev->md.start + ev->offset + sizeof(unsigned));
- magic = *(int *)(ev->md.start + ev->offset);
--
-- if(magic != 0xcafebabe) {
- printk ("LustreError: Unexpected response \n");
- return 1;
- CERROR("Unexpected response %x\n", magic);
-- }
--
-- if((i == count) || !count)
-- wake_up_process (client->tsk);
-- else
- printk ("LustreError: Received response after timeout for %d\n",i);
- return 1;
- CERROR("Received response after timeout for %d\n",i);
--}
--
--
--static struct pingcli_data *
--pingcli_start(struct portal_ioctl_data *args)
--{
- ptl_handle_ni_t *nip;
- ptl_handle_ni_t nih = PTL_INVALID_HANDLE;
-- unsigned ping_head_magic = PING_HEADER_MAGIC;
-- unsigned ping_bulk_magic = PING_BULK_MAGIC;
-- int rc;
-- struct timeval tv1, tv2;
-- char str[PTL_NALFMT_SIZE];
--
-- client->tsk = current;
-- client->args = args;
-- CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \
-- nal %d, size %u, count: %u, timeout: %u\n",
-- args->ioc_nid,
-- portals_nid2str(args->ioc_nal, args->ioc_nid, str),
-- args->ioc_nal, args->ioc_size,
-- args->ioc_count, args->ioc_timeout);
--
--
-- PORTAL_ALLOC (client->outbuf, STDSIZE + args->ioc_size) ;
-- if (client->outbuf == NULL)
-- {
-- CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
- pingcli_shutdown (4);
- pingcli_shutdown (nih, 4);
-- return (NULL);
-- }
--
-- PORTAL_ALLOC (client->inbuf,
-- (args->ioc_size + STDSIZE) * args->ioc_count);
-- if (client->inbuf == NULL)
-- {
-- CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
- pingcli_shutdown (4);
- pingcli_shutdown (nih, 4);
-- return (NULL);
-- }
--
-- /* Aquire and initialize the proper nal for portals. */
- if ((nip = kportal_get_ni (args->ioc_nal)) == NULL)
- rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih);
- if (rc != PTL_OK || rc != PTL_IFACE_DUP)
-- {
-- CERROR ("NAL %d not loaded\n", args->ioc_nal);
- pingcli_shutdown (4);
- pingcli_shutdown (nih, 4);
-- return (NULL);
-- }
--
-- /* Based on the initialization aquire our unique portal ID. */
- if ((rc = PtlGetId (*nip, &client->myid)))
- if ((rc = PtlGetId (nih, &client->myid)))
-- {
-- CERROR ("PtlGetId error %d\n", rc);
- pingcli_shutdown (2);
- pingcli_shutdown (nih, 2);
-- return (NULL);
-- }
--
-- /* Setup the local match entries */
-- client->id_local.nid = PTL_NID_ANY;
-- client->id_local.pid = PTL_PID_ANY;
--
-- /* Setup the remote match entries */
-- client->id_remote.nid = args->ioc_nid;
-- client->id_remote.pid = 0;
--
- if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT,
- if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT,
-- client->id_local, 0, ~0, PTL_RETAIN,
-- PTL_INS_AFTER, &client->me)))
-- {
-- CERROR ("PtlMEAttach error %d\n", rc);
- pingcli_shutdown (2);
- pingcli_shutdown (nih, 2);
-- return (NULL);
-- }
--
-- /* Allocate the event queue for this network interface */
- if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq)))
- if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq)))
-- {
-- CERROR ("PtlEQAlloc error %d\n", rc);
- pingcli_shutdown (2);
- pingcli_shutdown (nih, 2);
-- return (NULL);
-- }
--
-- count = args->ioc_count;
--
-- client->md_in_head.start = client->inbuf;
-- client->md_in_head.length = (args->ioc_size + STDSIZE)
-- * count;
-- client->md_in_head.threshold = PTL_MD_THRESH_INF;
- client->md_in_head.options = PTL_MD_OP_PUT;
- client->md_in_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- client->md_in_head.user_ptr = NULL;
- client->md_in_head.eventq = client->eq;
- client->md_in_head.eq_handle = client->eq;
-- memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count);
--
-- /* Attach the incoming buffer */
-- if ((rc = PtlMDAttach (client->me, client->md_in_head,
-- PTL_UNLINK, &client->md_in_head_h))) {
-- CERROR ("PtlMDAttach error %d\n", rc);
- pingcli_shutdown (1);
- pingcli_shutdown (nih, 1);
-- return (NULL);
-- }
-- /* Setup the outgoing ping header */
-- client->md_out_head.start = client->outbuf;
-- client->md_out_head.length = STDSIZE + args->ioc_size;
-- client->md_out_head.threshold = args->ioc_count;
- client->md_out_head.options = PTL_MD_OP_PUT;
- client->md_out_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- client->md_out_head.user_ptr = NULL;
- client->md_out_head.eventq = PTL_EQ_NONE;
- client->md_out_head.eq_handle = PTL_EQ_NONE;
--
-- memcpy (client->outbuf, &ping_head_magic, sizeof(ping_bulk_magic));
--
-- count = 0;
--
-- /* Bind the outgoing ping header */
- if ((rc=PtlMDBind (*nip, client->md_out_head,
- &client->md_out_head_h))) {
- if ((rc=PtlMDBind (nih, client->md_out_head,
- PTL_UNLINK, &client->md_out_head_h))) {
-- CERROR ("PtlMDBind error %d\n", rc);
- pingcli_shutdown (1);
- pingcli_shutdown (nih, 1);
-- return NULL;
-- }
-- while ((args->ioc_count - count)) {
-- memcpy (client->outbuf + sizeof(unsigned),
-- &(count), sizeof(unsigned));
-- /* Put the ping packet */
-- do_gettimeofday (&tv1);
--
-- memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1,
-- sizeof(struct timeval));
--
-- if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ,
-- client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) {
-- PDEBUG ("PtlPut (header)", rc);
- pingcli_shutdown (1);
- pingcli_shutdown (nih, 1);
-- return NULL;
-- }
- printk ("Lustre: sent msg no %d", count);
- CWARN ("Lustre: sent msg no %d", count);
--
-- set_current_state (TASK_INTERRUPTIBLE);
-- rc = schedule_timeout (20 * args->ioc_timeout);
-- if (rc == 0) {
- printk ("LustreError: :: timeout .....\n");
- CERROR ("timeout .....\n");
-- } else {
-- do_gettimeofday (&tv2);
- printk("Lustre: :: Reply in %u usec\n",
- (unsigned)((tv2.tv_sec - tv1.tv_sec)
- * 1000000 + (tv2.tv_usec - tv1.tv_usec)));
- CWARN("Reply in %u usec\n",
- (unsigned)((tv2.tv_sec - tv1.tv_sec)
- * 1000000 + (tv2.tv_usec - tv1.tv_usec)));
-- }
-- count++;
-- }
--
-- if (client->outbuf != NULL)
-- PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size);
--
-- if (client->inbuf != NULL)
-- PORTAL_FREE (client->inbuf,
-- (args->ioc_size + STDSIZE) * args->ioc_count);
--
- pingcli_shutdown (2);
- pingcli_shutdown (nih, 2);
--
-- /* Success! */
-- return NULL;
--} /* pingcli_setup() */
--
--
--
--/* called by the portals_ioctl for ping requests */
--int kping_client(struct portal_ioctl_data *args)
--{
-- PORTAL_ALLOC (client, sizeof(struct pingcli_data));
-- if (client == NULL)
-- {
-- CERROR ("Unable to allocate client structure\n");
-- return (0);
-- }
-- memset (client, 0, sizeof(struct pingcli_data));
-- pingcli_start (args);
--
-- return 0;
--} /* kping_client() */
--
--
--static int __init pingcli_init(void)
--{
-- PORTAL_SYMBOL_REGISTER(kping_client);
-- return 0;
--} /* pingcli_init() */
--
--
--static void /*__exit*/ pingcli_cleanup(void)
--{
-- PORTAL_SYMBOL_UNREGISTER (kping_client);
--} /* pingcli_cleanup() */
--
--
--MODULE_AUTHOR("Brian Behlendorf (LLNL)");
--MODULE_DESCRIPTION("A simple kernel space ping client for portals testing");
--MODULE_LICENSE("GPL");
--
--module_init(pingcli_init);
--module_exit(pingcli_cleanup);
--
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
--EXPORT_SYMBOL (kping_client);
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
-- * Author: Brian Behlendorf <behlendorf1@llnl.gov>
-- * Amey Inamdar <amey@calsoftinc.com>
-- * Kedar Sovani <kedar@calsoftinc.com>
-- *
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#define DEBUG_SUBSYSTEM S_PINGER
--
--#include <linux/kp30.h>
--#include <portals/p30.h>
--#include "ping.h"
--
--#include <linux/module.h>
--#include <linux/proc_fs.h>
--#include <linux/init.h>
--#include <linux/kernel.h>
--#include <linux/sched.h>
--#include <linux/version.h>
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
--#include <linux/workqueue.h>
--#else
--#include <linux/tqueue.h>
--#endif
--#include <linux/wait.h>
--#include <linux/smp_lock.h>
--
--#include <asm/unistd.h>
--#include <asm/semaphore.h>
--
--#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
--#define MAXSIZE (16*1024)
--
--static unsigned ping_head_magic;
--static unsigned ping_bulk_magic;
--static int nal = SOCKNAL; // Your NAL,
--static unsigned long packets_valid = 0; // Valid packets
--static int running = 1;
--atomic_t pkt;
--
--static struct pingsrv_data *server=NULL; // Our ping server
--
--static void *pingsrv_shutdown(int err)
--{
-- int rc;
--
-- /* Yes, we are intentionally allowing us to fall through each
-- * case in to the next. This allows us to pass an error
-- * code to just clean up the right stuff.
-- */
-- switch (err) {
-- case 1:
-- /* Unlink any memory descriptors we may have used */
-- if ((rc = PtlMDUnlink (server->mdin_h)))
-- PDEBUG ("PtlMDUnlink (out head buffer)", rc);
-- case 2:
-- /* Free the event queue */
-- if ((rc = PtlEQFree (server->eq)))
-- PDEBUG ("PtlEQFree", rc);
--
-- /* Unlink the client portal from the ME list */
-- if ((rc = PtlMEUnlink (server->me)))
-- PDEBUG ("PtlMEUnlink", rc);
--
-- case 3:
- kportal_put_ni (nal);
- PtlNIFini (server->ni);
--
-- case 4:
--
-- case 5:
-- if (server->in_buf != NULL)
-- PORTAL_FREE (server->in_buf, MAXSIZE);
--
-- if (server != NULL)
-- PORTAL_FREE (server,
-- sizeof (struct pingsrv_data));
--
-- }
--
-- CDEBUG (D_OTHER, "ping sever resources released\n");
-- return NULL;
--} /* pingsrv_shutdown() */
--
--
--int pingsrv_thread(void *arg)
--{
-- int rc;
-- unsigned long magic;
-- unsigned long ping_bulk_magic = 0xcafebabe;
--
-- kportal_daemonize ("pingsrv");
-- server->tsk = current;
--
-- while (running) {
-- set_current_state (TASK_INTERRUPTIBLE);
-- if (atomic_read (&pkt) == 0) {
-- schedule_timeout (MAX_SCHEDULE_TIMEOUT);
-- continue;
-- }
--
- magic = *((int *)(server->evnt.mem_desc.start
- magic = *((int *)(server->evnt.md.start
-- + server->evnt.offset));
--
--
-- if(magic != 0xdeadbeef) {
- printk("LustreError: Unexpected Packet to the server\n");
- CERROR("Unexpected Packet to the server\n");
--
-- }
-- memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic));
--
-- server->mdout.length = server->evnt.rlength;
-- server->mdout.start = server->in_buf;
-- server->mdout.threshold = 1;
- server->mdout.options = PTL_MD_OP_PUT;
- server->mdout.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- server->mdout.user_ptr = NULL;
- server->mdout.eventq = PTL_EQ_NONE;
- server->mdout.eq_handle = PTL_EQ_NONE;
--
-- /* Bind the outgoing buffer */
-- if ((rc = PtlMDBind (server->ni, server->mdout,
- &server->mdout_h))) {
- PTL_UNLINK, &server->mdout_h))) {
-- PDEBUG ("PtlMDBind", rc);
-- pingsrv_shutdown (1);
-- return 1;
-- }
--
--
-- server->mdin.start = server->in_buf;
-- server->mdin.length = MAXSIZE;
-- server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
- server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- server->mdin.user_ptr = NULL;
- server->mdin.eventq = server->eq;
- server->mdin.eq_handle = server->eq;
--
-- if ((rc = PtlMDAttach (server->me, server->mdin,
-- PTL_UNLINK, &server->mdin_h))) {
-- PDEBUG ("PtlMDAttach (bulk)", rc);
-- CDEBUG (D_OTHER, "ping server resources allocated\n");
-- }
--
-- if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ,
-- server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0)))
-- PDEBUG ("PtlPut", rc);
--
-- atomic_dec (&pkt);
--
-- }
-- pingsrv_shutdown (1);
-- running = 1;
-- return 0;
--}
--
- static int pingsrv_packet(ptl_event_t *ev)
-static void pingsrv_packet(ptl_event_t *ev)
--{
-- atomic_inc (&pkt);
-- wake_up_process (server->tsk);
- return 1;
--} /* pingsrv_head() */
--
- static int pingsrv_callback(ptl_event_t *ev)
-static void pingsrv_callback(ptl_event_t *ev)
--{
--
-- if (ev == NULL) {
-- CERROR ("null in callback, ev=%p\n", ev);
- return 0;
- return;
-- }
-- server->evnt = *ev;
--
- printk ("Lustre: received ping from nid "LPX64" "
- CWARN ("received ping from nid "LPX64" "
-- "(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n",
-- ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
- *((int *)(ev->mem_desc.start + ev->offset)),
- *((int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned))),
- *((int *)(ev->mem_desc.start + ev->offset + 2 *
- *((int *)(ev->md.start + ev->offset)),
- *((int *)(ev->md.start + ev->offset + sizeof(unsigned))),
- *((int *)(ev->md.start + ev->offset + 2 *
-- sizeof(unsigned))));
--
-- packets_valid++;
--
- return pingsrv_packet(ev);
- pingsrv_packet(ev);
--
--} /* pingsrv_callback() */
--
--
--static struct pingsrv_data *pingsrv_setup(void)
--{
- ptl_handle_ni_t *nip;
-- int rc;
-
- server->ni = PTL_INVALID_HANDLE;
--
-- /* Aquire and initialize the proper nal for portals. */
- if ((nip = kportal_get_ni (nal)) == NULL) {
- rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni);
- if (!(rc == PTL_OK || rc == PTL_IFACE_DUP)) {
-- CDEBUG (D_OTHER, "NAL %d not loaded\n", nal);
-- return pingsrv_shutdown (4);
-- }
--
- server->ni= *nip;
--
-- /* Based on the initialization aquire our unique portal ID. */
-- if ((rc = PtlGetId (server->ni, &server->my_id))) {
-- PDEBUG ("PtlGetId", rc);
-- return pingsrv_shutdown (2);
-- }
--
-- server->id_local.nid = PTL_NID_ANY;
-- server->id_local.pid = PTL_PID_ANY;
--
-- /* Attach a match entries for header packets */
-- if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER,
-- server->id_local,0, ~0,
-- PTL_RETAIN, PTL_INS_AFTER, &server->me))) {
-- PDEBUG ("PtlMEAttach", rc);
-- return pingsrv_shutdown (2);
-- }
--
--
- if ((rc = PtlEQAlloc (server->ni, 1024, pingsrv_callback,
- if ((rc = PtlEQAlloc (server->ni, 1024, &pingsrv_callback,
-- &server->eq))) {
-- PDEBUG ("PtlEQAlloc (callback)", rc);
-- return pingsrv_shutdown (2);
-- }
--
-- PORTAL_ALLOC (server->in_buf, MAXSIZE);
-- if(!server->in_buf){
-- CDEBUG (D_OTHER,"Allocation error\n");
-- return pingsrv_shutdown(2);
-- }
--
-- /* Setup the incoming buffer */
-- server->mdin.start = server->in_buf;
-- server->mdin.length = MAXSIZE;
-- server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
- server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- server->mdin.user_ptr = NULL;
- server->mdin.eventq = server->eq;
- server->mdin.eq_handle = server->eq;
-- memset (server->in_buf, 0, STDSIZE);
--
-- if ((rc = PtlMDAttach (server->me, server->mdin,
-- PTL_UNLINK, &server->mdin_h))) {
-- PDEBUG ("PtlMDAttach (bulk)", rc);
-- CDEBUG (D_OTHER, "ping server resources allocated\n");
-- }
--
-- /* Success! */
-- return server;
--} /* pingsrv_setup() */
--
--static int pingsrv_start(void)
--{
-- /* Setup our server */
-- if (!pingsrv_setup()) {
-- CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n");
-- return -ENOMEM;
-- }
-- kernel_thread (pingsrv_thread,NULL,0);
-- return 0;
--} /* pingsrv_start() */
--
--
--
--static int __init pingsrv_init(void)
--{
-- ping_head_magic = PING_HEADER_MAGIC;
-- ping_bulk_magic = PING_BULK_MAGIC;
-- PORTAL_ALLOC (server, sizeof(struct pingsrv_data));
-- return pingsrv_start ();
--} /* pingsrv_init() */
--
--
--static void /*__exit*/ pingsrv_cleanup(void)
--{
-- remove_proc_entry ("net/pingsrv", NULL);
--
-- running = 0;
-- wake_up_process (server->tsk);
-- while (running != 1) {
-- set_current_state (TASK_UNINTERRUPTIBLE);
-- schedule_timeout (HZ);
-- }
--
--} /* pingsrv_cleanup() */
--
--
--MODULE_PARM(nal, "i");
--MODULE_PARM_DESC(nal, "Use the specified NAL "
- "(6-kscimacnal, 2-ksocknal, 1-kqswnal)");
- "(2-ksocknal, 1-kqswnal)");
--
--MODULE_AUTHOR("Brian Behlendorf (LLNL)");
--MODULE_DESCRIPTION("A kernel space ping server for portals testing");
--MODULE_LICENSE("GPL");
--
--module_init(pingsrv_init);
--module_exit(pingsrv_cleanup);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
-- * Author: Brian Behlendorf <behlendorf1@llnl.gov>
-- * Kedar Sovani (kedar@calsoftinc.com)
-- * Amey Inamdar (amey@calsoftinc.com)
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--/* This is a striped down version of pinger. It follows a single
-- * request-response protocol. Doesn't do Bulk data pinging. Also doesn't
-- * send multiple packets in a single ioctl.
-- */
--
--
--#define DEBUG_SUBSYSTEM S_PINGER
--
--#include <linux/kp30.h>
--#include <portals/p30.h>
--#include <linux/module.h>
--#include <linux/proc_fs.h>
--#include <linux/init.h>
--#include <linux/poll.h>
--#include "ping.h"
--/* int portal_debug = D_PING_CLI; */
--
--
--#define STDSIZE (sizeof(int) + sizeof(int) + 4) /* The data is 4 bytes
-- assumed */
--
--/* This should be enclosed in a structure */
--
--static struct pingcli_data *client = NULL;
--
--static int count = 0;
--
--static void
- pingcli_shutdown(int err)
-pingcli_shutdown(ptl_handle_ni_t nih, int err)
--{
-- int rc;
--
-- /* Yes, we are intentionally allowing us to fall through each
-- * case in to the next. This allows us to pass an error
-- * code to just clean up the right stuff.
-- */
-- switch (err) {
-- case 1:
-- /* Unlink any memory descriptors we may have used */
-- if ((rc = PtlMDUnlink (client->md_out_head_h)))
-- PDEBUG ("PtlMDUnlink", rc);
-- case 2:
-- /* Free the event queue */
-- if ((rc = PtlEQFree (client->eq)))
-- PDEBUG ("PtlEQFree", rc);
--
-- if ((rc = PtlMEUnlink (client->me)))
-- PDEBUG ("PtlMEUnlink", rc);
-- case 3:
- kportal_put_ni (client->args->ioc_nal);
- PtlNIFini (nih);
--
-- case 4:
-- /* Free our buffers */
-- if (client->outbuf != NULL)
-- PORTAL_FREE (client->outbuf, STDSIZE);
--
-- if (client->inbuf != NULL)
-- PORTAL_FREE (client->inbuf, STDSIZE);
--
--
-- if (client != NULL)
-- PORTAL_FREE (client,
-- sizeof(struct pingcli_data));
-- }
--
--
-- CDEBUG (D_OTHER, "ping client released resources\n");
--} /* pingcli_shutdown() */
--
- static int pingcli_callback(ptl_event_t *ev)
-static void pingcli_callback(ptl_event_t *ev)
--{
- wake_up_process (client->tsk);
- return 1;
- wake_up_process (client->tsk);
--}
--
--
--static struct pingcli_data *
--pingcli_start(struct portal_ioctl_data *args)
--{
- const ptl_handle_ni_t *nip;
- ptl_handle_ni_t nih = PTL_INVALID_HANDLE;
-- unsigned ping_head_magic = PING_HEADER_MAGIC;
-- char str[PTL_NALFMT_SIZE];
-- int rc;
--
-- client->tsk = current;
-- client->args = args;
--
-- CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \
-- nal %d, size %u, count: %u, timeout: %u\n",
-- args->ioc_nid,
-- portals_nid2str(args->ioc_nid, args->ioc_nal, str),
-- args->ioc_nal, args->ioc_size,
-- args->ioc_count, args->ioc_timeout);
--
--
-- PORTAL_ALLOC (client->outbuf, STDSIZE) ;
-- if (client->outbuf == NULL)
-- {
-- CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
- pingcli_shutdown (4);
- pingcli_shutdown (nih, 4);
-- return (NULL);
-- }
--
-- PORTAL_ALLOC (client->inbuf, STDSIZE);
--
-- if (client->inbuf == NULL)
-- {
-- CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
- pingcli_shutdown (4);
- pingcli_shutdown (nih, 4);
-- return (NULL);
-- }
--
-- /* Aquire and initialize the proper nal for portals. */
- if ((nip = kportal_get_ni (args->ioc_nal)) == NULL)
- rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih);
- if (rc != PTL_OK && rc != PTL_IFACE_DUP)
-- {
-- CERROR ("NAL %d not loaded.\n", args->ioc_nal);
- pingcli_shutdown (4);
- pingcli_shutdown (nih, 4);
-- return (NULL);
-- }
--
-- /* Based on the initialization aquire our unique portal ID. */
- if ((rc = PtlGetId (*nip, &client->myid)))
- if ((rc = PtlGetId (nih, &client->myid)))
-- {
-- CERROR ("PtlGetId error %d\n", rc);
- pingcli_shutdown (2);
- pingcli_shutdown (nih, 2);
-- return (NULL);
-- }
--
-- /* Setup the local match entries */
-- client->id_local.nid = PTL_NID_ANY;
-- client->id_local.pid = PTL_PID_ANY;
--
-- /* Setup the remote match entries */
-- client->id_remote.nid = args->ioc_nid;
-- client->id_remote.pid = 0;
--
- if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT,
- if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT,
-- client->id_local, 0, ~0, PTL_RETAIN,
-- PTL_INS_AFTER, &client->me)))
-- {
-- CERROR ("PtlMEAttach error %d\n", rc);
- pingcli_shutdown (2);
- pingcli_shutdown (nih, 2);
-- return (NULL);
-- }
--
-- /* Allocate the event queue for this network interface */
- if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq)))
- if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq)))
-- {
-- CERROR ("PtlEQAlloc error %d\n", rc);
- pingcli_shutdown (2);
- pingcli_shutdown (nih, 2);
-- return (NULL);
-- }
--
--
-- client->md_in_head.start = client->inbuf;
-- client->md_in_head.length = STDSIZE;
-- client->md_in_head.threshold = 1;
- client->md_in_head.options = PTL_MD_OP_PUT;
- client->md_in_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- client->md_in_head.user_ptr = NULL;
- client->md_in_head.eventq = client->eq;
- client->md_in_head.eq_handle = client->eq;
-- memset (client->inbuf, 0, STDSIZE);
--
-- /* Attach the incoming buffer */
-- if ((rc = PtlMDAttach (client->me, client->md_in_head,
-- PTL_UNLINK, &client->md_in_head_h))) {
-- CERROR ("PtlMDAttach error %d\n", rc);
- pingcli_shutdown (1);
- pingcli_shutdown (nih, 1);
-- return (NULL);
-- }
--
-- /* Setup the outgoing ping header */
-- client->md_out_head.start = client->outbuf;
-- client->md_out_head.length = STDSIZE;
-- client->md_out_head.threshold = 1;
- client->md_out_head.options = PTL_MD_OP_PUT;
- client->md_out_head.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- client->md_out_head.user_ptr = NULL;
- client->md_out_head.eventq = PTL_EQ_NONE;
- client->md_out_head.eq_handle = PTL_EQ_NONE;
--
-- memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic));
--
-- /* Bind the outgoing ping header */
- if ((rc=PtlMDBind (*nip, client->md_out_head,
- &client->md_out_head_h))) {
- if ((rc=PtlMDBind (nih, client->md_out_head,
- PTL_UNLINK, &client->md_out_head_h))) {
-- CERROR ("PtlMDBind error %d\n", rc);
- pingcli_shutdown (1);
- pingcli_shutdown (nih, 1);
-- return (NULL);
-- }
-- /* Put the ping packet */
-- if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ,
-- client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) {
-- PDEBUG ("PtlPut (header)", rc);
- pingcli_shutdown (1);
- pingcli_shutdown (nih, 1);
-- return NULL;
-- }
--
-- count = 0;
-- set_current_state (TASK_INTERRUPTIBLE);
-- rc = schedule_timeout (20 * args->ioc_timeout);
-- if (rc == 0) {
- printk ("LustreError: Time out on the server\n");
- pingcli_shutdown (2);
- CERROR ("Time out on the server\n");
- pingcli_shutdown (nih, 2);
-- return NULL;
- } else
- printk("Lustre: Received respose from the server \n");
-
- } else {
- CWARN("Received respose from the server \n");
- }
--
- pingcli_shutdown (2);
- pingcli_shutdown (nih, 2);
--
-- /* Success! */
-- return NULL;
--} /* pingcli_setup() */
--
--
--
--/* called by the portals_ioctl for ping requests */
--int kping_client(struct portal_ioctl_data *args)
--{
--
-- PORTAL_ALLOC (client, sizeof(struct pingcli_data));
-- memset (client, 0, sizeof(struct pingcli_data));
-- if (client == NULL)
-- {
-- CERROR ("Unable to allocate client structure\n");
-- return (0);
-- }
-- pingcli_start (args);
--
-- return 0;
--} /* kping_client() */
--
--
--static int __init pingcli_init(void)
--{
-- PORTAL_SYMBOL_REGISTER(kping_client);
-- return 0;
--} /* pingcli_init() */
--
--
--static void /*__exit*/ pingcli_cleanup(void)
--{
-- PORTAL_SYMBOL_UNREGISTER (kping_client);
--} /* pingcli_cleanup() */
--
--
--MODULE_AUTHOR("Brian Behlendorf (LLNL)");
--MODULE_DESCRIPTION("A simple kernel space ping client for portals testing");
--MODULE_LICENSE("GPL");
--
--module_init(pingcli_init);
--module_exit(pingcli_cleanup);
--
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
--EXPORT_SYMBOL (kping_client);
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
-- * Author: Brian Behlendorf <behlendorf1@llnl.gov>
-- * Amey Inamdar <amey@calsoftinc.com>
-- * Kedar Sovani <kedar@calsoftinc.com>
-- *
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* This is a striped down version of pinger. It follows a single
-- * request-response protocol. Doesn't do Bulk data pinging. Also doesn't
-- * send multiple packets in a single ioctl.
-- */
--
--#define DEBUG_SUBSYSTEM S_PINGER
--
--#include <linux/kp30.h>
--#include <portals/p30.h>
--#include "ping.h"
--
--#include <linux/module.h>
--#include <linux/proc_fs.h>
--#include <linux/init.h>
--#include <linux/kernel.h>
--#include <linux/sched.h>
--#include <linux/version.h>
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
--#include <linux/workqueue.h>
--#else
--#include <linux/tqueue.h>
--#endif
--#include <linux/wait.h>
--#include <linux/smp_lock.h>
--
--#include <asm/unistd.h>
--#include <asm/semaphore.h>
--
--#define STDSIZE (sizeof(int) + sizeof(int) + 4)
--
- static int nal = 0; // Your NAL,
-static int nal = PTL_IFACE_DEFAULT; // Your NAL,
--static unsigned long packets_valid = 0; // Valid packets
--static int running = 1;
--atomic_t pkt;
--
--static struct pingsrv_data *server=NULL; // Our ping server
--
--#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
--#endif
--
--static void *pingsrv_shutdown(int err)
--{
-- int rc;
--
-- /* Yes, we are intentionally allowing us to fall through each
-- * case in to the next. This allows us to pass an error
-- * code to just clean up the right stuff.
-- */
-- switch (err) {
-- case 1:
-- /* Unlink any memory descriptors we may have used */
-- if ((rc = PtlMDUnlink (server->mdin_h)))
-- PDEBUG ("PtlMDUnlink (out head buffer)", rc);
-- case 2:
-- /* Free the event queue */
-- if ((rc = PtlEQFree (server->eq)))
-- PDEBUG ("PtlEQFree", rc);
--
-- /* Unlink the client portal from the ME list */
-- if ((rc = PtlMEUnlink (server->me)))
-- PDEBUG ("PtlMEUnlink", rc);
--
-- case 3:
- kportal_put_ni (nal);
- PtlNIFini(server->ni);
--
-- case 4:
--
-- if (server->in_buf != NULL)
-- PORTAL_FREE (server->in_buf, STDSIZE);
--
-- if (server != NULL)
-- PORTAL_FREE (server,
-- sizeof (struct pingsrv_data));
--
-- }
--
-- CDEBUG (D_OTHER, "ping sever resources released\n");
-- return NULL;
--} /* pingsrv_shutdown() */
--
--
--int pingsrv_thread(void *arg)
--{
-- int rc;
--
-- kportal_daemonize ("pingsrv");
-- server->tsk = current;
--
-- while (running) {
-- set_current_state (TASK_INTERRUPTIBLE);
-- if (atomic_read (&pkt) == 0) {
-- schedule_timeout (MAX_SCHEDULE_TIMEOUT);
-- continue;
-- }
--
-- server->mdout.start = server->in_buf;
-- server->mdout.length = STDSIZE;
-- server->mdout.threshold = 1;
- server->mdout.options = PTL_MD_OP_PUT;
- server->mdout.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- server->mdout.user_ptr = NULL;
- server->mdout.eventq = PTL_EQ_NONE;
- server->mdout.eq_handle = PTL_EQ_NONE;
--
-- /* Bind the outgoing buffer */
-- if ((rc = PtlMDBind (server->ni, server->mdout,
- &server->mdout_h))) {
- PTL_UNLINK, &server->mdout_h))) {
-- PDEBUG ("PtlMDBind", rc);
-- pingsrv_shutdown (1);
-- return 1;
-- }
--
--
-- server->mdin.start = server->in_buf;
-- server->mdin.length = STDSIZE;
-- server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
- server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- server->mdin.user_ptr = NULL;
- server->mdin.eventq = server->eq;
- server->mdin.eq_handle = server->eq;
--
-- if ((rc = PtlMDAttach (server->me, server->mdin,
-- PTL_UNLINK, &server->mdin_h))) {
-- PDEBUG ("PtlMDAttach (bulk)", rc);
-- CDEBUG (D_OTHER, "ping server resources allocated\n");
-- }
--
-- if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ,
-- server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0)))
-- PDEBUG ("PtlPut", rc);
--
-- atomic_dec (&pkt);
--
-- }
-- pingsrv_shutdown (1);
-- running = 1;
-- return 0;
--}
--
- static int pingsrv_packet(ptl_event_t *ev)
-static void pingsrv_packet(ptl_event_t *ev)
--{
-- atomic_inc (&pkt);
-- wake_up_process (server->tsk);
- return 1;
--} /* pingsrv_head() */
--
- static int pingsrv_callback(ptl_event_t *ev)
-static void pingsrv_callback(ptl_event_t *ev)
--{
--
-- if (ev == NULL) {
-- CERROR ("null in callback, ev=%p\n", ev);
- return 0;
- return;
-- }
-- server->evnt = *ev;
--
- printk ("Lustre: received ping from nid "LPX64" "
- "(off=%u rlen=%u mlen=%u head=%x)\n",
- ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
- *((int *)(ev->mem_desc.start + ev->offset)));
- CWARN("Lustre: received ping from nid "LPX64" "
- "(off=%u rlen=%u mlen=%u head=%x)\n",
- ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
- *((int *)(ev->md.start + ev->offset)));
--
-- packets_valid++;
--
- return pingsrv_packet(ev);
- pingsrv_packet(ev);
--
--} /* pingsrv_callback() */
--
--
--static struct pingsrv_data *pingsrv_setup(void)
--{
- ptl_handle_ni_t *nip;
-- int rc;
--
-- /* Aquire and initialize the proper nal for portals. */
- if ((nip = kportal_get_ni (nal)) == NULL) {
- server->ni = PTL_INVALID_HANDLE;
-
- rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni);
- if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-- CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal);
-- return pingsrv_shutdown (4);
-- }
-
- server->ni= *nip;
--
-- /* Based on the initialization aquire our unique portal ID. */
-- if ((rc = PtlGetId (server->ni, &server->my_id))) {
-- PDEBUG ("PtlGetId", rc);
-- return pingsrv_shutdown (2);
-- }
--
-- server->id_local.nid = PTL_NID_ANY;
-- server->id_local.pid = PTL_PID_ANY;
--
-- /* Attach a match entries for header packets */
-- if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER,
-- server->id_local,0, ~0,
-- PTL_RETAIN, PTL_INS_AFTER, &server->me))) {
-- PDEBUG ("PtlMEAttach", rc);
-- return pingsrv_shutdown (2);
-- }
--
--
-- if ((rc = PtlEQAlloc (server->ni, 64, pingsrv_callback,
-- &server->eq))) {
-- PDEBUG ("PtlEQAlloc (callback)", rc);
-- return pingsrv_shutdown (2);
-- }
--
-- PORTAL_ALLOC (server->in_buf, STDSIZE);
-- if(!server->in_buf){
-- CDEBUG (D_OTHER,"Allocation error\n");
-- return pingsrv_shutdown(2);
-- }
--
-- /* Setup the incoming buffer */
-- server->mdin.start = server->in_buf;
-- server->mdin.length = STDSIZE;
-- server->mdin.threshold = 1;
- server->mdin.options = PTL_MD_OP_PUT;
- server->mdin.options = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-- server->mdin.user_ptr = NULL;
- server->mdin.eventq = server->eq;
- server->mdin.eq_handle = server->eq;
-- memset (server->in_buf, 0, STDSIZE);
--
-- if ((rc = PtlMDAttach (server->me, server->mdin,
-- PTL_UNLINK, &server->mdin_h))) {
-- PDEBUG ("PtlMDAttach (bulk)", rc);
-- CDEBUG (D_OTHER, "ping server resources allocated\n");
-- }
--
-- /* Success! */
-- return server;
--} /* pingsrv_setup() */
--
--static int pingsrv_start(void)
--{
-- /* Setup our server */
-- if (!pingsrv_setup()) {
-- CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n");
-- return -ENOMEM;
-- }
-- kernel_thread (pingsrv_thread,NULL,0);
-- return 0;
--} /* pingsrv_start() */
--
--
--
--static int __init pingsrv_init(void)
--{
-- PORTAL_ALLOC (server, sizeof(struct pingsrv_data));
-- return pingsrv_start ();
--} /* pingsrv_init() */
--
--
--static void /*__exit*/ pingsrv_cleanup(void)
--{
-- remove_proc_entry ("net/pingsrv", NULL);
--
-- running = 0;
-- wake_up_process (server->tsk);
-- while (running != 1) {
-- set_current_state (TASK_UNINTERRUPTIBLE);
-- schedule_timeout (HZ);
-- }
--
--} /* pingsrv_cleanup() */
--
--
--MODULE_PARM(nal, "i");
--MODULE_PARM_DESC(nal, "Use the specified NAL "
- "(6-kscimacnal, 2-ksocknal, 1-kqswnal)");
- "(2-ksocknal, 1-kqswnal)");
--
--MODULE_AUTHOR("Brian Behlendorf (LLNL)");
--MODULE_DESCRIPTION("A kernel space ping server for portals testing");
--MODULE_LICENSE("GPL");
--
--module_init(pingsrv_init);
--module_exit(pingsrv_cleanup);
+++ /dev/null
--#!/bin/sh
--
--SIMPLE=${SIMPLE:-0}
--
--if [ $SIMPLE -eq 0 ]; then
-- PING=pingcli.o
--else
-- PING=spingcli.o
--fi
--
--case "$1" in
-- tcp)
-- /sbin/insmod ../oslib/portals.o
-- /sbin/insmod ../socknal/ksocknal.o
-- /sbin/insmod ./$PING
-- echo ksocknal > /tmp/nal
-- ;;
--
-- elan)
-- /sbin/insmod ../oslib/portals.o
-- /sbin/insmod ../qswnal/kqswnal.o
-- /sbin/insmod ./$PING
-- echo kqswnal > /tmp/nal
-- ;;
--
-- gm)
-- /sbin/insmod portals
-- /sbin/insmod kgmnal
-- /sbin/insmod ./$PING
-- echo kgmnal > /tmp/nal
-- ;;
--
-- *)
-- echo "Usage : ${0} < tcp | elan | gm>"
-- exit 1;
--esac
--exit 0;
+++ /dev/null
--#!/bin/sh
--
--SIMPLE=${SIMPLE:-0}
--
--if [ $SIMPLE -eq 0 ]; then
-- PING=pingsrv.o
--else
-- PING=spingsrv.o
--fi
--
--case "$1" in
-- tcp)
-- /sbin/insmod ../oslib/portals.o
-- /sbin/insmod ../socknal/ksocknal.o
-- /sbin/insmod ./$PING nal=2
-- echo ksocknal > /tmp/nal
-- ;;
--
-- elan)
-- /sbin/insmod ../oslib/portals.o
-- /sbin/insmod ../qswnal/kqswnal.o
-- /sbin/insmod ./$PING nal=4
-- echo kqswnal > /tmp/nal
-- ;;
--
-- gm)
-- /sbin/insmod portals
-- /sbin/insmod kgmnal
-- /sbin/insmod ./$PING nal=3
-- echo kgmnal > /tmp/nal
-- ;;
--
-- *)
-- echo "Usage : ${0} < tcp | elan | gm>"
-- exit 1;
--esac
--../utils/acceptor 9999&
--exit 0;
+++ /dev/null
--#!/bin/sh
--
--SIMPLE=${SIMPLE:-1}
--
--if [ $SIMPLE -eq 0 ]; then
-- PING=spingcli
--else
-- PING=pingcli
--fi
--
--rmmod $PING
--NAL=`cat /tmp/nal`;
--rmmod $NAL
--rmmod portals
+++ /dev/null
--#!/bin/sh
--
--SIMPLE=${SIMPLE:-1}
--
--if [ $SIMPLE -eq 0 ]; then
-- PING=spingsrv
--else
-- PING=pingsrv
--fi
--
--rmmod $PING
--NAL=`cat /tmp/nal`;
--rmmod $NAL
--killall -9 acceptor
--rm -f /var/run/acceptor-9999.pid
--rmmod portals
+++ /dev/null
--if LIBLUSTRE
-if !CRAY_PORTALS
--noinst_LIBRARIES = libtcpnal.a
--endif
-
- noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h \
- ipmap.h bridge.h procbridge.h
-
- libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \
- dispatch.h table.h timer.h address.c procapi.c proclib.c \
- connection.c tcpnal.c connection.h
-endif
--
-noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
--libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
--libtcpnal_a_CFLAGS = $(LLCFLAGS)
+++ /dev/null
--This library implements two NAL interfaces, both running over IP.
--The first, tcpnal, creates TCP connections between participating
--processes in order to transport the portals requests. The second,
--ernal, provides a simple transport protocol which runs over
--UDP datagrams.
--
--The interface functions return both of these values in host order for
--convenience and readability. However this means that addresses
--exchanged in messages between hosts of different orderings will not
--function properly.
--
--Both NALs use the same support functions in order to schedule events
--and communicate with the generic portals implementation.
--
-- -------------------------
-- | api |
-- |_______________________|
-- | lib |
-- |_______________________|
-- | ernal | |tcpnal |
-- |--------| |----------|
-- | udpsock| |connection|
-- |-----------------------|
-- | timer/select |
-- -------------------------
--
--
-- These NALs uses the framework from fdnal of a pipe between the api
--and library sides. This is wrapped up in the select on the library
--side, and blocks on the api side. Performance could be severely
--enhanced by collapsing this aritificial barrier, by using shared
--memory queues, or by wiring the api layer directly to the library.
--
--
--nid is defined as the low order 24-bits of the IP address of the
--physical node left shifted by 8 plus a virtual node number of 0
--through 255 (really only 239). The virtual node number of a tcpnal
--application should be specified using the environment variable
--PTL_VIRTNODE. pid is now a completely arbitrary number in the
--range of 0 to 255. The IP interface used can be overridden by
--specifying the appropriate hostid by setting the PTL_HOSTID
--environment variable. The value can be either dotted decimal
--(n.n.n.n) or hex starting with "0x".
--TCPNAL:
-- As the NAL needs to try to send to a particular nid/pid pair, it
-- will open up connections on demand. Because the port associated with
-- the connecting socket is different from the bound port, two
-- connections will normally be established between a pair of peers, with
-- data flowing from the anonymous connect (active) port to the advertised
-- or well-known bound (passive) port of each peer.
--
-- Should the connection fail to open, an error is reported to the
-- library component, which causes the api request to fail.
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* address.c:
-- * this file provides functions to aquire the IP address of the node
-- * and translate them into a NID/PID pair which supports a static
-- * mapping of virtual nodes into the port range of an IP socket.
--*/
--
--#include <stdlib.h>
--#include <netdb.h>
--#include <unistd.h>
--#include <stdio.h>
--#include <portals/p30.h>
--#include <bridge.h>
--#include <ipmap.h>
--
--
--/* Function: get_node_id
-- * Returns: a 32 bit id for this node, actually a big-endian IP address
-- *
-- * get_node_id() determines the host name and uses the resolver to
-- * find out its ip address. This is fairly fragile and inflexible, but
-- * explicitly asking about interfaces and their addresses is very
-- * complicated and nonportable.
-- */
--static unsigned int get_node_id(void)
--{
-- char buffer[255];
-- unsigned int x;
-- struct hostent *he;
-- char * host_envp;
--
-- if (!(host_envp = getenv("PTL_HOSTID")))
-- {
-- gethostname(buffer,sizeof(buffer));
-- he=gethostbyname(buffer);
-- if (he)
-- x=*(unsigned int *)he->h_addr_list[0];
-- else
-- x = 0;
-- return(ntohl(x));
-- }
-- else
-- {
-- if (host_envp[1] != 'x')
-- {
-- int a, b, c, d;
-- sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
-- return ((a<<24) | (b<<16) | (c<<8) | d);
-- }
-- else
-- {
-- long long hostid = strtoll(host_envp, 0, 0);
-- return((unsigned int) hostid);
-- }
-- }
--}
--
--
--/* Function: set_address
-- * Arugments: t: a procnal structure to populate with the request
-- *
-- * set_address performs the bit manipulations to set the nid, pid, and
-- * iptop8 fields of the procnal structures.
-- *
-- * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
-- */
--
--#ifdef DIRECT_IP_MODE
--void set_address(bridge t,ptl_pid_t pidrequest)
--{
-- int port;
-- if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
-- else port=pidrequest;
- t->nal_cb->ni.nid=get_node_id();
- t->nal_cb->ni.pid=port;
- t->lib_nal->libnal_ni.ni_pid.nid=get_node_id();
- t->lib_nal->libnal_ni.ni_pid.pid=port;
--}
--#else
--
--void set_address(bridge t,ptl_pid_t pidrequest)
--{
-- int virtnode, in_addr, port;
-- ptl_pid_t pid;
--
-- /* get and remember my node id*/
-- if (!getenv("PTL_VIRTNODE"))
-- virtnode = 0;
-- else
-- {
-- int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
-- >> PNAL_VNODE_SHIFT);
-- virtnode = atoi(getenv("PTL_VIRTNODE"));
-- if (virtnode > maxvnode)
-- {
-- fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
-- virtnode, maxvnode);
-- return;
-- }
-- }
--
-- in_addr = get_node_id();
--
-- t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
- t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK)
- << PNAL_VNODE_SHIFT)
- + virtnode;
-
- t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK)
- << PNAL_VNODE_SHIFT)
- + virtnode;
-- pid=pidrequest;
-- /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
--#ifdef notyet
-- if (pid==(unsigned short)PTL_PID_ANY) port = 0;
--#endif
-- if (pid==(unsigned short)PTL_PID_ANY)
-- {
-- fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
-- return;
-- }
-- else if (pid > PNAL_PID_MASK)
-- {
-- fprintf(stderr, "portal pid of %d is too large - max %d\n",
-- pid, PNAL_PID_MASK);
-- return;
-- }
-- else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
- t->nal_cb->ni.pid=pid;
- t->lib_nal->libnal_ni.ni_pid.pid=pid;
--}
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#ifndef TCPNAL_PROCBRIDGE_H
--#define TCPNAL_PROCBRIDGE_H
--
--#include <portals/lib-p30.h>
-#include <portals/nal.h>
-
-#define PTL_IFACE_TCP 1
-#define PTL_IFACE_ER 2
-#define PTL_IFACE_SS 3
-#define PTL_IFACE_MAX 4
--
--typedef struct bridge {
-- int alive;
- nal_cb_t *nal_cb;
- lib_nal_t *lib_nal;
-- void *lower;
-- void *local;
-- void (*shutdown)(struct bridge *);
-- /* this doesn't really belong here */
-- unsigned char iptop8;
--} *bridge;
-
--
- nal_t *bridge_init(ptl_interface_t nal,
- ptl_pid_t pid_request,
- ptl_ni_limits_t *desired,
- ptl_ni_limits_t *actual,
- int *rc);
--
--typedef int (*nal_initialize)(bridge);
--extern nal_initialize nal_table[PTL_IFACE_MAX];
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* connection.c:
-- This file provides a simple stateful connection manager which
-- builds tcp connections on demand and leaves them open for
-- future use. It also provides the machinery to allow peers
-- to connect to it
--*/
--
--#include <stdlib.h>
--#include <pqtimer.h>
--#include <dispatch.h>
--#include <table.h>
--#include <stdio.h>
--#include <stdarg.h>
--#include <string.h>
--#include <unistd.h>
--#include <sys/types.h>
--#include <sys/socket.h>
--#include <netinet/in.h>
--#include <netinet/tcp.h>
--#include <portals/types.h>
--#include <portals/list.h>
--#include <portals/lib-types.h>
--#include <portals/socknal.h>
--#include <linux/kp30.h>
--#include <connection.h>
--#include <pthread.h>
--#include <errno.h>
--#ifndef __CYGWIN__
--#include <syscall.h>
--#endif
--
--/* global variable: acceptor port */
--unsigned short tcpnal_acceptor_port = 988;
--
--
--/* Function: compare_connection
-- * Arguments: connection c: a connection in the hash table
-- * ptl_process_id_t: an id to verify agains
-- * Returns: 1 if the connection is the one requested, 0 otherwise
-- *
-- * compare_connection() tests for collisions in the hash table
-- */
--static int compare_connection(void *arg1, void *arg2)
--{
-- connection c = arg1;
-- unsigned int * id = arg2;
--#if 0
-- return((c->ip==id[0]) && (c->port==id[1]));
--#else
-- /* CFS specific hacking */
-- return (c->ip == id[0]);
--#endif
--}
--
--
--/* Function: connection_key
-- * Arguments: ptl_process_id_t id: an id to hash
-- * Returns: a not-particularily-well-distributed hash
-- * of the id
-- */
--static unsigned int connection_key(unsigned int *id)
--{
--#if 0
-- return(id[0]^id[1]);
--#else
-- /* CFS specific hacking */
-- return (unsigned int) id[0];
--#endif
--}
--
--
--/* Function: remove_connection
-- * Arguments: c: the connection to remove
-- */
--void remove_connection(void *arg)
--{
-- connection c = arg;
-- unsigned int id[2];
--
-- id[0]=c->ip;
-- id[1]=c->port;
-- hash_table_remove(c->m->connections,id);
-- close(c->fd);
-- free(c);
--}
--
--
--/* Function: read_connection:
-- * Arguments: c: the connection to read from
-- * dest: the buffer to read into
-- * len: the number of bytes to read
-- * Returns: success as 1, or failure as 0
-- *
-- * read_connection() reads data from the connection, continuing
-- * to read partial results until the request is satisfied or
-- * it errors. TODO: this read should be covered by signal protection.
-- */
--int read_connection(connection c,
-- unsigned char *dest,
-- int len)
--{
-- int offset = 0,rc;
--
-- if (len) {
-- do {
--#ifndef __CYGWIN__
-- rc = syscall(SYS_read, c->fd, dest+offset, len-offset);
--#else
-- rc = recv(c->fd, dest+offset, len-offset, 0);
--#endif
-- if (rc <= 0) {
-- if (errno == EINTR) {
-- rc = 0;
-- } else {
-- remove_connection(c);
-- return (0);
-- }
-- }
-- offset += rc;
-- } while (offset < len);
-- }
-- return (1);
--}
--
--static int connection_input(void *d)
--{
-- connection c = d;
-- return((*c->m->handler)(c->m->handler_arg,c));
--}
--
--
--/* Function: allocate_connection
-- * Arguments: t: tcpnal the allocation is occuring in the context of
-- * dest: portal endpoint address for this connection
-- * fd: open file descriptor for the socket
-- * Returns: an allocated connection structure
-- *
-- * just encompasses the action common to active and passive
-- * connections of allocation and placement in the global table
-- */
--static connection allocate_connection(manager m,
-- unsigned int ip,
-- unsigned short port,
-- int fd)
--{
-- connection c=malloc(sizeof(struct connection));
-- unsigned int id[2];
-- c->m=m;
-- c->fd=fd;
-- c->ip=ip;
-- c->port=port;
-- id[0]=ip;
-- id[1]=port;
-- register_io_handler(fd,READ_HANDLER,connection_input,c);
-- hash_table_insert(m->connections,c,id);
-- return(c);
--}
--
--
--/* Function: new_connection
-- * Arguments: t: opaque argument holding the tcpname
-- * Returns: 1 in order to reregister for new connection requests
-- *
-- * called when the bound service socket recieves
-- * a new connection request, it always accepts and
-- * installs a new connection
-- */
--static int new_connection(void *z)
--{
-- manager m=z;
-- struct sockaddr_in s;
-- int len=sizeof(struct sockaddr_in);
-- int fd=accept(m->bound,(struct sockaddr *)&s,&len);
-- unsigned int nid=*((unsigned int *)&s.sin_addr);
-- /* cfs specific hack */
-- //unsigned short pid=s.sin_port;
-- pthread_mutex_lock(&m->conn_lock);
-- allocate_connection(m,htonl(nid),0/*pid*/,fd);
-- pthread_mutex_unlock(&m->conn_lock);
-- return(1);
--}
-
- /* FIXME assuming little endian, cleanup!! */
- #define __cpu_to_le64(x) ((__u64)(x))
- #define __le64_to_cpu(x) ((__u64)(x))
- #define __cpu_to_le32(x) ((__u32)(x))
- #define __le32_to_cpu(x) ((__u32)(x))
- #define __cpu_to_le16(x) ((__u16)(x))
- #define __le16_to_cpu(x) ((__u16)(x))
--
--extern ptl_nid_t tcpnal_mynid;
--
--int
--tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
--{
-- int rc;
- int nob;
-- ptl_hdr_t hdr;
-- ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
--
-- LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
--
-- memset (&hdr, 0, sizeof (hdr));
- hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC);
- hmv->version_major = __cpu_to_le32 (PORTALS_PROTO_VERSION_MAJOR);
- hmv->version_minor = __cpu_to_le32 (PORTALS_PROTO_VERSION_MINOR);
- hmv->magic = cpu_to_le32(PORTALS_PROTO_MAGIC);
- hmv->version_major = cpu_to_le32(PORTALS_PROTO_VERSION_MAJOR);
- hmv->version_minor = cpu_to_le32(PORTALS_PROTO_VERSION_MINOR);
--
- hdr.src_nid = __cpu_to_le64 (tcpnal_mynid);
- hdr.type = __cpu_to_le32 (PTL_MSG_HELLO);
- hdr.src_nid = cpu_to_le64(tcpnal_mynid);
- hdr.type = cpu_to_le32(PTL_MSG_HELLO);
--
- hdr.msg.hello.type = __cpu_to_le32 (type);
- hdr.msg.hello.incarnation = 0;
- hdr.msg.hello.type = cpu_to_le32(type);
- hdr.msg.hello.incarnation = cpu_to_le64(incarnation);
-
- /* I don't send any interface info */
--
-- /* Assume sufficient socket buffering for this message */
-- rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
-- if (rc <= 0) {
-- CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
-- return (rc);
-- }
--
-- rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
-- if (rc <= 0) {
-- CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
-- return (rc);
-- }
--
- if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) {
- if (hmv->magic != le32_to_cpu(PORTALS_PROTO_MAGIC)) {
-- CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
- __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid);
- cpu_to_le32(hmv->magic), PORTALS_PROTO_MAGIC, *nid);
-- return (-EPROTO);
-- }
--
- if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
- hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
- if (hmv->version_major != cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
- hmv->version_minor != cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
-- CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
-- " from "LPX64"\n",
- __le16_to_cpu (hmv->version_major),
- __le16_to_cpu (hmv->version_minor),
- le16_to_cpu (hmv->version_major),
- le16_to_cpu (hmv->version_minor),
-- PORTALS_PROTO_VERSION_MAJOR,
-- PORTALS_PROTO_VERSION_MINOR,
-- *nid);
-- return (-EPROTO);
-- }
--
- #if (PORTALS_PROTO_VERSION_MAJOR != 0)
- # error "This code only understands protocol version 0.x"
-#if (PORTALS_PROTO_VERSION_MAJOR != 1)
-# error "This code only understands protocol version 1.x"
--#endif
- /* version 0 sends magic/version as the dest_nid of a 'hello' header,
- /* version 1 sends magic/version as the dest_nid of a 'hello' header,
-- * so read the rest of it in now... */
--
-- rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
-- if (rc <= 0) {
-- CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
-- rc, *nid);
-- return (rc);
-- }
--
-- /* ...and check we got what we expected */
- if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) ||
- hdr.payload_length != __cpu_to_le32 (0)) {
- CERROR ("Expecting a HELLO hdr with 0 payload,"
- if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) {
- CERROR ("Expecting a HELLO hdr "
-- " but got type %d with %d payload from "LPX64"\n",
- __le32_to_cpu (hdr.type),
- __le32_to_cpu (hdr.payload_length), *nid);
- le32_to_cpu (hdr.type),
- le32_to_cpu (hdr.payload_length), *nid);
-- return (-EPROTO);
-- }
--
- if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
- if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
-- CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n");
-- return (-EPROTO);
-- }
--
-- if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */
- *nid = __le64_to_cpu(hdr.src_nid);
- } else if (*nid != __le64_to_cpu (hdr.src_nid)) {
- *nid = le64_to_cpu(hdr.src_nid);
- } else if (*nid != le64_to_cpu (hdr.src_nid)) {
-- CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
- __le64_to_cpu (hdr.src_nid), *nid);
- le64_to_cpu (hdr.src_nid), *nid);
- return (-EPROTO);
- }
-
- /* Ignore any interface info in the payload */
- nob = le32_to_cpu(hdr.payload_length);
- if (nob > getpagesize()) {
- CERROR("Unexpected HELLO payload %d from "LPX64"\n",
- nob, *nid);
-- return (-EPROTO);
- }
- if (nob > 0) {
- char *space = (char *)malloc(nob);
-
- if (space == NULL) {
- CERROR("Can't allocate scratch buffer %d\n", nob);
- return (-ENOMEM);
- }
-
- rc = syscall(SYS_read, sockfd, space, nob);
- if (rc <= 0) {
- CERROR("Error %d skipping HELLO payload from "
- LPX64"\n", rc, *nid);
- return (rc);
- }
-- }
--
-- return (0);
--}
--
--/* Function: force_tcp_connection
-- * Arguments: t: tcpnal
-- * dest: portals endpoint for the connection
-- * Returns: an allocated connection structure, either
-- * a pre-existing one, or a new connection
-- */
--connection force_tcp_connection(manager m,
-- unsigned int ip,
-- unsigned short port,
-- procbridge pb)
--{
-- connection conn;
-- struct sockaddr_in addr;
- struct sockaddr_in locaddr;
-- unsigned int id[2];
-- struct timeval tv;
-- __u64 incarnation;
--
- int fd;
- int option;
- int rc;
- int rport;
- ptl_nid_t peernid = PTL_NID_ANY;
- port = tcpnal_acceptor_port;
--
-- id[0] = ip;
-- id[1] = port;
--
-- pthread_mutex_lock(&m->conn_lock);
--
-- conn = hash_table_find(m->connections, id);
- if (conn)
- goto out;
-
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(ip);
- addr.sin_port = htons(port);
- if (!conn) {
- int fd;
- int option;
- ptl_nid_t peernid = PTL_NID_ANY;
--
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_addr.s_addr = INADDR_ANY;
- bzero((char *) &addr, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(ip);
- addr.sin_port = htons(port);
--
- for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- perror("tcpnal socket failed");
- goto out;
- }
-
- option = 1;
- rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
- &option, sizeof(option));
- if (rc != 0) {
- perror ("Can't set SO_REUSEADDR for socket");
- close(fd);
- goto out;
- }
- if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
- perror("tcpnal socket failed");
- exit(-1);
- }
- if (connect(fd, (struct sockaddr *)&addr,
- sizeof(struct sockaddr_in))) {
- perror("tcpnal connect");
- return(0);
- }
--
- locaddr.sin_port = htons(rport);
- rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr));
- if (rc == 0 || errno == EACCES) {
- rc = connect(fd, (struct sockaddr *)&addr,
- sizeof(struct sockaddr_in));
- if (rc == 0) {
- break;
- } else if (errno != EADDRINUSE) {
- perror("Error connecting to remote host");
- close(fd);
- goto out;
- }
- } else if (errno != EADDRINUSE) {
- perror("Error binding to privileged port");
- close(fd);
- goto out;
- }
- close(fd);
- }
-
- if (rport == IPPORT_RESERVED / 2) {
- fprintf(stderr, "Out of ports trying to bind to a reserved port\n");
- goto out;
- }
-
--#if 1
- option = 1;
- setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
- option = 1<<20;
- setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
- option = 1<<20;
- setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
- option = 1;
- setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
- option = 1<<20;
- setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
- option = 1<<20;
- setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
--#endif
--
- gettimeofday(&tv, NULL);
- incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
- gettimeofday(&tv, NULL);
- incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
--
- /* say hello */
- if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation))
- /* say hello */
- if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation))
-- exit(-1);
-
- conn = allocate_connection(m, ip, port, fd);
-
- /* let nal thread know this event right away */
- if (conn)
- procbridge_wakeup_nal(pb);
--
- out:
- conn = allocate_connection(m, ip, port, fd);
-
- /* let nal thread know this event right away */
- if (conn)
- procbridge_wakeup_nal(pb);
- }
-
-- pthread_mutex_unlock(&m->conn_lock);
-- return (conn);
--}
-
--
--/* Function: bind_socket
-- * Arguments: t: the nal state for this interface
-- * port: the port to attempt to bind to
-- * Returns: 1 on success, or 0 on error
-- *
-- * bind_socket() attempts to allocate and bind a socket to the requested
-- * port, or dynamically assign one from the kernel should the port be
-- * zero. Sets the bound and bound_handler elements of m.
-- *
-- * TODO: The port should be an explicitly sized type.
-- */
--static int bind_socket(manager m,unsigned short port)
--{
-- struct sockaddr_in addr;
-- int alen=sizeof(struct sockaddr_in);
--
-- if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0)
-- return(0);
--
-- bzero((char *) &addr, sizeof(addr));
-- addr.sin_family = AF_INET;
-- addr.sin_addr.s_addr = 0;
-- addr.sin_port = htons(port);
--
-- if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){
-- perror ("tcpnal bind");
-- return(0);
-- }
--
-- getsockname(m->bound,(struct sockaddr *)&addr, &alen);
--
-- m->bound_handler=register_io_handler(m->bound,READ_HANDLER,
-- new_connection,m);
-- listen(m->bound,5);
-- m->port=addr.sin_port;
-- return(1);
--}
--
--
--/* Function: shutdown_connections
-- * Arguments: m: the manager structure
-- *
-- * close all connections and reclaim resources
-- */
--void shutdown_connections(manager m)
--{
-- close(m->bound);
-- remove_io_handler(m->bound_handler);
-- hash_destroy_table(m->connections,remove_connection);
-- free(m);
--}
--
--
--/* Function: init_connections
-- * Arguments: t: the nal state for this interface
-- * port: the port to attempt to bind to
-- * Returns: a newly allocated manager structure, or
-- * zero if the fixed port could not be bound
-- */
--manager init_connections(unsigned short pid,
-- int (*input)(void *, void *),
-- void *a)
--{
-- manager m = (manager)malloc(sizeof(struct manager));
-- m->connections = hash_create_table(compare_connection,connection_key);
-- m->handler = input;
-- m->handler_arg = a;
-- pthread_mutex_init(&m->conn_lock, 0);
--
-- if (bind_socket(m,pid))
-- return(m);
--
-- free(m);
-- return(0);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#include <table.h>
--#include <procbridge.h>
--
--typedef struct manager {
-- table connections;
-- pthread_mutex_t conn_lock; /* protect connections table */
-- int bound;
-- io_handler bound_handler;
-- int (*handler)(void *, void *);
-- void *handler_arg;
-- unsigned short port;
--} *manager;
--
--
--typedef struct connection {
-- unsigned int ip;
-- unsigned short port;
-- int fd;
-- manager m;
--} *connection;
--
--connection force_tcp_connection(manager m, unsigned int ip, unsigned int short,
-- procbridge pb);
--manager init_connections(unsigned short, int (*f)(void *, void *), void *);
--void remove_connection(void *arg);
--void shutdown_connections(manager m);
--int read_connection(connection c, unsigned char *dest, int len);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002 Cluster File Systems, Inc.
-- * Author: Phil Schwan <phil@clusterfs.com>
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#include <stdio.h>
--#include <fcntl.h>
--#include <errno.h>
--#include <stdarg.h>
--#include <sys/time.h>
--
--int smp_processor_id = 1;
--char debug_file_path[1024] = "/tmp/lustre-log";
--char debug_file_name[1024];
--FILE *debug_file_fd;
--
--int portals_do_debug_dumplog(void *arg)
--{
-- printf("Look in %s\n", debug_file_name);
-- return 0;
--}
--
--
--void portals_debug_print(void)
--{
-- return;
--}
--
--
--void portals_debug_dumplog(void)
--{
-- printf("Look in %s\n", debug_file_name);
-- return;
--}
--
--
--int portals_debug_init(unsigned long bufsize)
--{
-- debug_file_fd = stdout;
-- return 0;
--}
--
--int portals_debug_cleanup(void)
--{
-- return 0; //close(portals_debug_fd);
--}
--
--int portals_debug_clear_buffer(void)
--{
-- return 0;
--}
--
--int portals_debug_mark_buffer(char *text)
--{
--
-- fprintf(debug_file_fd, "*******************************************************************************\n");
-- fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text);
-- fprintf(debug_file_fd, "*******************************************************************************\n");
--
-- return 0;
--}
--
--int portals_debug_copy_to_user(char *buf, unsigned long len)
--{
-- return 0;
--}
--
--/* FIXME: I'm not very smart; someone smarter should make this better. */
--void
--portals_debug_msg (int subsys, int mask, char *file, const char *fn,
-- const int line, const char *format, ...)
--{
-- va_list ap;
-- unsigned long flags;
-- struct timeval tv;
-- int nob;
--
--
-- /* NB since we pass a non-zero sized buffer (at least) on the first
-- * print, we can be assured that by the end of all the snprinting,
-- * we _do_ have a terminated buffer, even if our message got truncated.
-- */
--
-- gettimeofday(&tv, NULL);
--
-- nob += fprintf(debug_file_fd,
-- "%02x:%06x:%d:%lu.%06lu ",
-- subsys >> 24, mask, smp_processor_id,
-- tv.tv_sec, tv.tv_usec);
--
-- nob += fprintf(debug_file_fd,
-- "(%s:%d:%s() %d+%ld): ",
-- file, line, fn, 0,
-- 8192 - ((unsigned long)&flags & 8191UL));
--
-- va_start (ap, format);
-- nob += fprintf(debug_file_fd, format, ap);
-- va_end (ap);
--
--
--}
--
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--/* this file is only called dispatch.h to prevent it
-- from colliding with /usr/include/sys/select.h */
--
--typedef struct io_handler *io_handler;
--
--struct io_handler{
-- io_handler *last;
-- io_handler next;
-- int fd;
-- int type;
-- int (*function)(void *);
-- void *argument;
-- int disabled;
--};
--
--
--#define READ_HANDLER 1
--#define WRITE_HANDLER 2
--#define EXCEPTION_HANDLER 4
--#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER)
--
--io_handler register_io_handler(int fd,
-- int type,
-- int (*function)(void *),
-- void *arg);
--
--void remove_io_handler (io_handler i);
--void init_unix_timer(void);
--void select_timer_block(when until);
--when now(void);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#define DIRECT_IP_MODE
--#ifdef DIRECT_IP_MODE
--#define PNAL_NID(in_addr, port) (in_addr)
--#define PNAL_PID(pid) (pid)
--#define PNAL_IP(in_addr, port) (in_addr)
--#define PNAL_PORT(nid, pid) (pid)
--#else
--
--#define PNAL_BASE_PORT 4096
--#define PNAL_HOSTID_SHIFT 24
--#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
--#define PNAL_VNODE_SHIFT 8
--#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
--#define PNAL_PID_SHIFT 8
--#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
--
--#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
-- << PNAL_VNODE_SHIFT) \
-- | (((ntohs(port)-PNAL_BASE_PORT) >>\
-- PNAL_PID_SHIFT)))
--#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK)
--
--#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\
-- >> PNAL_VNODE_SHIFT)\
-- | (t->iptop8 << PNAL_HOSTID_SHIFT)))
--#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
-- << PNAL_VNODE_SHIFT) \
-- | ((pid) & PNAL_PID_MASK)) \
-- + PNAL_BASE_PORT))
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* timer.c:
-- * this file implements a simple priority-queue based timer system. when
-- * combined with a file which implements now() and block(), it can
-- * be used to provide course-grained time-based callbacks.
-- */
--
--#include <pqtimer.h>
--#include <stdlib.h>
--#include <string.h>
--
--struct timer {
-- void (*function)(void *);
-- void *arg;
-- when w;
-- int interval;
-- int disable;
--};
--
--typedef struct thunk *thunk;
--struct thunk {
-- void (*f)(void *);
-- void *a;
-- thunk next;
--};
--
--extern when now(void);
--
--static thunk thunks;
--static int internal;
--static void (*block_function)(when);
--static int number_of_timers;
--static int size_of_pqueue;
--static timer *timers;
--
--
--static void heal(int where)
--{
-- int left=(where<<1);
-- int right=(where<<1)+1;
-- int min=where;
-- timer temp;
--
-- if (left <= number_of_timers)
-- if (timers[left]->w < timers[min]->w) min=left;
-- if (right <= number_of_timers)
-- if (timers[right]->w < timers[min]->w) min=right;
-- if (min != where){
-- temp=timers[where];
-- timers[where]=timers[min];
-- timers[min]=temp;
-- heal(min);
-- }
--}
--
--static void add_pqueue(int i)
--{
-- timer temp;
-- int parent=(i>>1);
-- if ((i>1) && (timers[i]->w< timers[parent]->w)){
-- temp=timers[i];
-- timers[i]=timers[parent];
-- timers[parent]=temp;
-- add_pqueue(parent);
-- }
--}
--
--static void add_timer(timer t)
--{
-- if (size_of_pqueue<(number_of_timers+2)){
-- int oldsize=size_of_pqueue;
-- timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10));
-- memcpy(new,timers,sizeof(timer)*oldsize);
-- timers=new;
-- }
-- timers[++number_of_timers]=t;
-- add_pqueue(number_of_timers);
--}
--
--/* Function: register_timer
-- * Arguments: interval: the time interval from the current time when
-- * the timer function should be called
-- * function: the function to call when the time has expired
-- * argument: the argument to call it with.
-- * Returns: a pointer to a timer structure
-- */
--timer register_timer(when interval,
-- void (*function)(void *),
-- void *argument)
--{
-- timer t=(timer)malloc(sizeof(struct timer));
--
-- t->arg=argument;
-- t->function=function;
-- t->interval=interval;
-- t->disable=0;
-- t->w=now()+interval;
-- add_timer(t);
-- if (!internal && (number_of_timers==1))
-- block_function(t->w);
-- return(t);
--}
--
--/* Function: remove_timer
-- * Arguments: t:
-- * Returns: nothing
-- *
-- * remove_timer removes a timer from the system, insuring
-- * that it will never be called. It does not actually
-- * free the timer due to reentrancy issues.
-- */
--
--void remove_timer(timer t)
--{
-- t->disable=1;
--}
--
--
--
--void timer_fire()
--{
-- timer current;
--
-- current=timers[1];
-- timers[1]=timers[number_of_timers--];
-- heal(1);
-- if (!current->disable) {
-- (*current->function)(current->arg);
-- }
-- free(current);
--}
--
--when next_timer(void)
--{
-- when here=now();
--
-- while (number_of_timers && (timers[1]->w <= here)) timer_fire();
-- if (number_of_timers) return(timers[1]->w);
-- return(0);
--}
--
--/* Function: timer_loop
-- * Arguments: none
-- * Returns: never
-- *
-- * timer_loop() is the blocking dispatch function for the timer.
-- * Is calls the block() function registered with init_timer,
-- * and handles associated with timers that have been registered.
-- */
--void timer_loop()
--{
-- when here;
--
-- while (1){
-- thunk z;
-- here=now();
--
-- for (z=thunks;z;z=z->next) (*z->f)(z->a);
--
-- if (number_of_timers){
-- if (timers[1]->w > here){
-- (*block_function)(timers[1]->w);
-- } else {
-- timer_fire();
-- }
-- } else {
-- thunk z;
-- for (z=thunks;z;z=z->next) (*z->f)(z->a);
-- (*block_function)(0);
-- }
-- }
--}
--
--
--/* Function: register_thunk
-- * Arguments: f: the function to call
-- * a: the single argument to call it with
-- *
-- * Thunk functions get called at irregular intervals, they
-- * should not assume when, or take a particularily long
-- * amount of time. Thunks are for background cleanup tasks.
-- */
--void register_thunk(void (*f)(void *),void *a)
--{
-- thunk t=(void *)malloc(sizeof(struct thunk));
-- t->f=f;
-- t->a=a;
-- t->next=thunks;
-- thunks=t;
--}
--
--/* Function: initialize_timer
-- * Arguments: block: the function to call to block for the specified interval
-- *
-- * initialize_timer() must be called before any other timer function,
-- * including timer_loop.
-- */
--void initialize_timer(void (*block)(when))
--{
-- block_function=block;
-- number_of_timers=0;
-- size_of_pqueue=10;
-- timers=(timer *)malloc(sizeof(timer)*size_of_pqueue);
-- thunks=0;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--typedef unsigned long long when;
--when now(void);
--typedef struct timer *timer;
--timer register_timer(when interval,
-- void (*function)(void *),
-- void *argument);
--timer register_timer_wait(void);
--void remove_timer(timer);
--void timer_loop(void);
--void initialize_timer(void (*block)(when));
--void timer_fire(void);
--
--
--#define HZ 0x100000000ull
--
--
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2003 Cluster File Systems, Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* api.c:
-- * This file provides the 'api' side for the process-based nals.
-- * it is responsible for creating the 'library' side thread,
-- * and passing wrapped portals transactions to it.
-- *
-- * Along with initialization, shutdown, and transport to the library
-- * side, this file contains some stubs to satisfy the nal definition.
-- */
--#include <stdio.h>
--#include <stdlib.h>
--#include <unistd.h>
--#include <string.h>
--#ifndef __CYGWIN__
--#include <syscall.h>
--#endif
--#include <sys/socket.h>
--#include <procbridge.h>
--#include <pqtimer.h>
--#include <dispatch.h>
--#include <errno.h>
--
--
--/* XXX CFS workaround, to give a chance to let nal thread wake up
-- * from waiting in select
-- */
--static int procbridge_notifier_handler(void *arg)
--{
-- static char buf[8];
-- procbridge p = (procbridge) arg;
--
-- syscall(SYS_read, p->notifier[1], buf, sizeof(buf));
-- return 1;
--}
--
--void procbridge_wakeup_nal(procbridge p)
--{
-- static char buf[8];
-- syscall(SYS_write, p->notifier[0], buf, sizeof(buf));
- }
-
- /* Function: forward
- * Arguments: nal_t *nal: pointer to my top-side nal structure
- * id: the command to pass to the lower layer
- * args, args_len:pointer to and length of the request
- * ret, ret_len: pointer to and size of the result
- * Returns: a portals status code
- *
- * forwards a packaged api call from the 'api' side to the 'library'
- * side, and collects the result
- */
- static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
- {
- bridge b = (bridge) n->nal_data;
-
- if (id == PTL_FINI) {
- lib_fini(b->nal_cb);
-
- if (b->shutdown)
- (*b->shutdown)(b);
- }
-
- lib_dispatch(b->nal_cb, NULL, id, args, ret);
-
- return (PTL_OK);
--}
-
--
--/* Function: shutdown
-- * Arguments: nal: a pointer to my top side nal structure
-- * ni: my network interface index
-- *
-- * cleanup nal state, reclaim the lower side thread and
-- * its state using PTL_FINI codepoint
-- */
- static int procbridge_shutdown(nal_t *n, int ni)
-static void procbridge_shutdown(nal_t *n)
--{
- bridge b=(bridge)n->nal_data;
- lib_nal_t *nal = n->nal_data;
- bridge b=(bridge)nal->libnal_data;
-- procbridge p=(procbridge)b->local;
--
-- p->nal_flags |= NAL_FLAG_STOPPING;
-- procbridge_wakeup_nal(p);
--
-- do {
-- pthread_mutex_lock(&p->mutex);
-- if (p->nal_flags & NAL_FLAG_STOPPED) {
-- pthread_mutex_unlock(&p->mutex);
-- break;
-- }
-- pthread_cond_wait(&p->cond, &p->mutex);
-- pthread_mutex_unlock(&p->mutex);
-- } while (1);
--
-- free(p);
- return(0);
- }
-
-
- /* Function: validate
- * useless stub
- */
- static int procbridge_validate(nal_t *nal, void *base, size_t extent)
- {
- return(0);
--}
-
-
- /* FIXME cfs temporary workaround! FIXME
- * global time out value
- */
- int __tcpnal_eqwait_timeout_value = 0;
- int __tcpnal_eqwait_timedout = 0;
-
- /* Function: yield
- * Arguments: pid:
- *
- * this function was originally intended to allow the
- * lower half thread to be scheduled to allow progress. we
- * overload it to explicitly block until signalled by the
- * lower half.
- */
- static void procbridge_yield(nal_t *n)
- {
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- pthread_mutex_lock(&p->mutex);
- if (!__tcpnal_eqwait_timeout_value) {
- pthread_cond_wait(&p->cond,&p->mutex);
- } else {
- struct timeval now;
- struct timespec timeout;
-
- gettimeofday(&now, NULL);
- timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value;
- timeout.tv_nsec = now.tv_usec * 1000;
--
- __tcpnal_eqwait_timedout =
- pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
- }
- pthread_mutex_unlock(&p->mutex);
- }
--
-/* forward decl */
-extern int procbridge_startup (nal_t *, ptl_pid_t,
- ptl_ni_limits_t *, ptl_ni_limits_t *);
--
- static void procbridge_lock(nal_t * nal, unsigned long *flags){}
- static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
--/* api_nal
-- * the interface vector to allow the generic code to access
- * this nal. this is seperate from the library side nal_cb.
- * this nal. this is seperate from the library side lib_nal.
-- * TODO: should be dyanmically allocated
-- */
- static nal_t api_nal = {
- ni: {0},
-nal_t procapi_nal = {
-- nal_data: NULL,
- forward: procbridge_forward,
- shutdown: procbridge_shutdown,
- validate: procbridge_validate,
- yield: procbridge_yield,
- lock: procbridge_lock,
- unlock: procbridge_unlock
- nal_ni_init: procbridge_startup,
- nal_ni_fini: procbridge_shutdown,
--};
--
--ptl_nid_t tcpnal_mynid;
--
- /* Function: procbridge_interface
-/* Function: procbridge_startup
-- *
-- * Arguments: pid: requested process id (port offset)
-- * PTL_ID_ANY not supported.
-- * desired: limits passed from the application
-- * and effectively ignored
-- * actual: limits actually allocated and returned
-- *
- * Returns: a pointer to my statically allocated top side NAL
- * structure
- * Returns: portals rc
-- *
-- * initializes the tcp nal. we define unix_failure as an
-- * error wrapper to cut down clutter.
-- */
- nal_t *procbridge_interface(int num_interface,
- ptl_pt_index_t ptl_size,
- ptl_ac_index_t acl_size,
- ptl_pid_t requested_pid)
-int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
- ptl_ni_limits_t *requested_limits,
- ptl_ni_limits_t *actual_limits)
--{
-- nal_init_args_t args;
-
-- procbridge p;
-- bridge b;
- static int initialized=0;
- ptl_ni_limits_t limits = {-1,-1,-1,-1,-1};
- /* XXX nal_type is purely private to tcpnal here */
-- int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */
--
- if(initialized) return (&api_nal);
- LASSERT(nal == &procapi_nal);
--
-- init_unix_timer();
--
-- b=(bridge)malloc(sizeof(struct bridge));
-- p=(procbridge)malloc(sizeof(struct procbridge));
- api_nal.nal_data=b;
-- b->local=p;
-
- if (ptl_size)
- limits.max_ptable_index = ptl_size;
- if (acl_size)
- limits.max_atable_index = acl_size;
--
-- args.nia_requested_pid = requested_pid;
- args.nia_limits = &limits;
- args.nia_requested_limits = requested_limits;
- args.nia_actual_limits = actual_limits;
-- args.nia_nal_type = nal_type;
-- args.nia_bridge = b;
- args.nia_apinal = nal;
--
-- /* init procbridge */
-- pthread_mutex_init(&p->mutex,0);
-- pthread_cond_init(&p->cond, 0);
-- p->nal_flags = 0;
- pthread_mutex_init(&p->nal_cb_lock, 0);
--
-- /* initialize notifier */
-- if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) {
-- perror("socketpair failed");
- return NULL;
- return PTL_FAIL;
-- }
--
-- if (!register_io_handler(p->notifier[1], READ_HANDLER,
-- procbridge_notifier_handler, p)) {
-- perror("fail to register notifier handler");
- return NULL;
- return PTL_FAIL;
-- }
--
-- /* create nal thread */
-- if (pthread_create(&p->t, NULL, nal_thread, &args)) {
-- perror("nal_init: pthread_create");
- return(NULL);
- return PTL_FAIL;
-- }
--
-- do {
-- pthread_mutex_lock(&p->mutex);
-- if (p->nal_flags & (NAL_FLAG_RUNNING | NAL_FLAG_STOPPED)) {
-- pthread_mutex_unlock(&p->mutex);
-- break;
-- }
-- pthread_cond_wait(&p->cond, &p->mutex);
-- pthread_mutex_unlock(&p->mutex);
-- } while (1);
--
-- if (p->nal_flags & NAL_FLAG_STOPPED)
- return (NULL);
- return PTL_FAIL;
--
- b->nal_cb->ni.nid = tcpnal_mynid;
- initialized = 1;
- b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid;
--
- return (&api_nal);
- return PTL_OK;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2003 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#ifndef _PROCBRIDGE_H_
--#define _PROCBRIDGE_H_
--
--#include <pthread.h>
--#include <bridge.h>
--#include <ipmap.h>
--
--
--#define NAL_FLAG_RUNNING 1
--#define NAL_FLAG_STOPPING 2
--#define NAL_FLAG_STOPPED 4
--
--typedef struct procbridge {
-- /* sync between user threads and nal thread */
-- pthread_t t;
-- pthread_cond_t cond;
-- pthread_mutex_t mutex;
--
-- /* socket pair used to notify nal thread */
-- int notifier[2];
--
-- int nal_flags;
--
- pthread_mutex_t nal_cb_lock;
--} *procbridge;
--
--typedef struct nal_init_args {
-- ptl_pid_t nia_requested_pid;
- ptl_ni_limits_t *nia_limits;
- ptl_ni_limits_t *nia_requested_limits;
- ptl_ni_limits_t *nia_actual_limits;
-- int nia_nal_type;
-- bridge nia_bridge;
- nal_t *nia_apinal;
--} nal_init_args_t;
--
--extern void *nal_thread(void *);
--
--
--#define PTL_INIT (LIB_MAX_DISPATCH+1)
--#define PTL_FINI (LIB_MAX_DISPATCH+2)
--
--#define MAX_ACLS 1
--#define MAX_PTLS 128
--
--extern void set_address(bridge t,ptl_pid_t pidrequest);
- extern nal_t *procbridge_interface(int num_interface,
- ptl_pt_index_t ptl_size,
- ptl_ac_index_t acl_size,
- ptl_pid_t requested_pid);
--extern void procbridge_wakeup_nal(procbridge p);
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2003 Cluster File Systems, Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* lib.c:
-- * This file provides the 'library' side for the process-based nals.
-- * it is responsible for communication with the 'api' side and
-- * providing service to the generic portals 'library'
-- * implementation. 'library' might be better termed 'communication'
-- * or 'kernel'.
-- */
--
--#include <stdlib.h>
--#include <stdio.h>
--#include <stdarg.h>
--#include <unistd.h>
--#include <procbridge.h>
--#include <sys/types.h>
--#include <sys/socket.h>
--#include <netdb.h>
--#include <errno.h>
--#include <timer.h>
--#include <dispatch.h>
--
--/* the following functions are stubs to satisfy the nal definition
-- without doing anything particularily useful*/
-
- static ptl_err_t nal_write(nal_cb_t *nal,
- void *private,
- user_ptr dst_addr,
- void *src_addr,
- size_t len)
- {
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
- }
-
- static ptl_err_t nal_read(nal_cb_t * nal,
- void *private,
- void *dst_addr,
- user_ptr src_addr,
- size_t len)
- {
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
- }
-
- static void *nal_malloc(nal_cb_t *nal,
- size_t len)
- {
- void *buf = malloc(len);
- return buf;
- }
-
- static void nal_free(nal_cb_t *nal,
- void *buf,
- size_t len)
- {
- free(buf);
- }
-
- static void nal_printf(nal_cb_t *nal,
- const char *fmt,
- ...)
- {
- va_list ap;
-
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
- }
-
-
- static void nal_cli(nal_cb_t *nal,
- unsigned long *flags)
- {
- bridge b = (bridge) nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_lock(&p->nal_cb_lock);
- }
-
-
- static void nal_sti(nal_cb_t *nal,
- unsigned long *flags)
- {
- bridge b = (bridge)nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_unlock(&p->nal_cb_lock);
- }
-
--
- static int nal_dist(nal_cb_t *nal,
-static int nal_dist(lib_nal_t *nal,
-- ptl_nid_t nid,
-- unsigned long *dist)
--{
-- return 0;
--}
--
- static void wakeup_topside(void *z)
-static void check_stopping(void *z)
--{
-- bridge b = z;
-- procbridge p = b->local;
- int stop;
--
- if ((p->nal_flags & NAL_FLAG_STOPPING) == 0)
- return;
-
-- pthread_mutex_lock(&p->mutex);
- stop = p->nal_flags & NAL_FLAG_STOPPING;
- if (stop)
- p->nal_flags |= NAL_FLAG_STOPPED;
- p->nal_flags |= NAL_FLAG_STOPPED;
-- pthread_cond_broadcast(&p->cond);
-- pthread_mutex_unlock(&p->mutex);
--
- if (stop)
- pthread_exit(0);
- pthread_exit(0);
--}
--
--
--/* Function: nal_thread
-- * Arguments: z: an opaque reference to a nal control structure
-- * allocated and partially populated by the api level code
-- * Returns: nothing, and only on error or explicit shutdown
-- *
-- * This function is the entry point of the pthread initiated on
-- * the api side of the interface. This thread is used to handle
-- * asynchronous delivery to the application.
-- *
-- * We define a limit macro to place a ceiling on limits
-- * for syntactic convenience
-- */
- #define LIMIT(x,y,max)\
- if ((unsigned int)x > max) y = max;
-
--extern int tcpnal_init(bridge);
--
--nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0};
--
--void *nal_thread(void *z)
--{
-- nal_init_args_t *args = (nal_init_args_t *) z;
-- bridge b = args->nia_bridge;
-- procbridge p=b->local;
-- int rc;
- ptl_pid_t pid_request;
- ptl_process_id_t process_id;
-- int nal_type;
- ptl_ni_limits_t desired;
- ptl_ni_limits_t actual;
--
- b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
- b->nal_cb->nal_data=b;
- b->nal_cb->cb_read=nal_read;
- b->nal_cb->cb_write=nal_write;
- b->nal_cb->cb_malloc=nal_malloc;
- b->nal_cb->cb_free=nal_free;
- b->nal_cb->cb_map=NULL;
- b->nal_cb->cb_unmap=NULL;
- b->nal_cb->cb_printf=nal_printf;
- b->nal_cb->cb_cli=nal_cli;
- b->nal_cb->cb_sti=nal_sti;
- b->nal_cb->cb_dist=nal_dist;
- b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t));
- b->lib_nal->libnal_data=b;
- b->lib_nal->libnal_map=NULL;
- b->lib_nal->libnal_unmap=NULL;
- b->lib_nal->libnal_dist=nal_dist;
--
- pid_request = args->nia_requested_pid;
- desired = *args->nia_limits;
-- nal_type = args->nia_nal_type;
-
- actual = desired;
- LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES);
- LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS);
- LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS);
- LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS);
- LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS);
--
- set_address(b,pid_request);
- /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which
- * lib_init() is about to do from the process_id passed to it...*/
- set_address(b,args->nia_requested_pid);
--
- process_id = b->lib_nal->libnal_ni.ni_pid;
-
-- if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
-- /* initialize the generic 'library' level code */
--
- rc = lib_init(b->nal_cb,
- b->nal_cb->ni.nid,
- b->nal_cb->ni.pid,
- 10,
- actual.max_ptable_index,
- actual.max_atable_index);
- rc = lib_init(b->lib_nal, args->nia_apinal,
- process_id,
- args->nia_requested_limits,
- args->nia_actual_limits);
--
-- /*
-- * Whatever the initialization returned is passed back to the
-- * user level code for further interpretation. We just exit if
-- * it is non-zero since something went wrong.
-- */
-- /* this should perform error checking */
-- pthread_mutex_lock(&p->mutex);
- p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING;
- p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING;
-- pthread_cond_broadcast(&p->cond);
-- pthread_mutex_unlock(&p->mutex);
--
- if (!rc) {
- if (rc == PTL_OK) {
-- /* the thunk function is called each time the timer loop
-- performs an operation and returns to blocking mode. we
-- overload this function to inform the api side that
-- it may be interested in looking at the event queue */
- register_thunk(wakeup_topside,b);
- register_thunk(check_stopping,b);
-- timer_loop();
-- }
-- return(0);
--}
- #undef LIMIT
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* select.c:
-- * Provides a general mechanism for registering and dispatching
-- * io events through the select system call.
-- */
--
--#ifdef sun
--#include <sys/filio.h>
--#else
--#include <sys/ioctl.h>
--#endif
--
--#include <sys/time.h>
--#include <sys/types.h>
--#include <stdlib.h>
--#include <pqtimer.h>
--#include <dispatch.h>
--
--
--static struct timeval beginning_of_epoch;
--static io_handler io_handlers;
--
--/* Function: now
-- *
-- * Return: the current time in canonical units: a 64 bit number
-- * where the most significant 32 bits contains the number
-- * of seconds, and the least signficant a count of (1/(2^32))ths
-- * of a second.
-- */
--when now()
--{
-- struct timeval result;
--
-- gettimeofday(&result,0);
-- return((((unsigned long long)result.tv_sec)<<32)|
-- (((unsigned long long)result.tv_usec)<<32)/1000000);
--}
--
--
--/* Function: register_io_handler
-- * Arguments: fd: the file descriptor of interest
-- * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER
-- * function: a function to call when io is available on fd
-- * arg: an opaque correlator to return to the handler
-- * Returns: a pointer to the io_handler structure
-- */
--io_handler register_io_handler(int fd,
-- int type,
-- int (*function)(void *),
-- void *arg)
--{
-- io_handler i=(io_handler)malloc(sizeof(struct io_handler));
-- if ((i->fd=fd)>=0){
-- i->type=type;
-- i->function=function;
-- i->argument=arg;
-- i->disabled=0;
-- i->last=&io_handlers;
-- if ((i->next=io_handlers)) i->next->last=&i->next;
-- io_handlers=i;
-- }
-- return(i);
--}
--
--/* Function: remove_io_handler
-- * Arguments: i: a pointer to the handler to stop servicing
-- *
-- * remove_io_handler() doesn't actually free the handler, due
-- * to reentrancy problems. it just marks the handler for
-- * later cleanup by the blocking function.
-- */
--void remove_io_handler (io_handler i)
--{
-- i->disabled=1;
--}
--
--static void set_flag(io_handler n,fd_set *fds)
--{
-- if (n->type & READ_HANDLER) FD_SET(n->fd, &fds[0]);
-- if (n->type & WRITE_HANDLER) FD_SET(n->fd,&fds[1]);
-- if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd, &fds[2]);
--}
--
--
--/* Function: select_timer_block
-- * Arguments: until: an absolute time when the select should return
-- *
-- * This function dispatches the various file descriptors' handler
-- * functions, if the kernel indicates there is io available.
-- */
--void select_timer_block(when until)
--{
-- fd_set fds[3];
-- struct timeval timeout;
-- struct timeval *timeout_pointer;
-- int result;
-- io_handler j;
-- io_handler *k;
--
-- /* TODO: loop until the entire interval is expired*/
-- if (until){
-- when interval=until-now();
-- timeout.tv_sec=(interval>>32);
-- timeout.tv_usec=((interval<<32)/1000000)>>32;
-- timeout_pointer=&timeout;
-- } else timeout_pointer=0;
--
-- FD_ZERO(&fds[0]);
-- FD_ZERO(&fds[1]);
-- FD_ZERO(&fds[2]);
-- for (k=&io_handlers;*k;){
-- if ((*k)->disabled){
-- j=*k;
-- *k=(*k)->next;
-- free(j);
-- }
-- if (*k) {
-- set_flag(*k,fds);
-- k=&(*k)->next;
-- }
-- }
--
-- result=select(FD_SETSIZE, &fds[0], &fds[1], &fds[2], timeout_pointer);
--
-- if (result > 0)
-- for (j=io_handlers;j;j=j->next){
-- if (!(j->disabled) &&
-- ((FD_ISSET(j->fd, &fds[0]) && (j->type & READ_HANDLER)) ||
-- (FD_ISSET(j->fd, &fds[1]) && (j->type & WRITE_HANDLER)) ||
-- (FD_ISSET(j->fd, &fds[2]) && (j->type & EXCEPTION_HANDLER)))){
-- if (!(*j->function)(j->argument))
-- j->disabled=1;
-- }
-- }
--}
--
--/* Function: init_unix_timer()
-- * is called to initialize the library
-- */
--void init_unix_timer()
--{
-- io_handlers=0;
-- gettimeofday(&beginning_of_epoch, 0);
-- initialize_timer(select_timer_block);
--}
+++ /dev/null
--if LIBLUSTRE
-if !CRAY_PORTALS
--noinst_LIBRARIES = libtcpnal.a
--endif
-
- noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h \
- ipmap.h bridge.h procbridge.h
-
- libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \
- dispatch.h table.h timer.h address.c procapi.c proclib.c \
- connection.c tcpnal.c connection.h
-endif
--
-noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
--libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
--libtcpnal_a_CFLAGS = $(LLCFLAGS)
+++ /dev/null
--This library implements two NAL interfaces, both running over IP.
--The first, tcpnal, creates TCP connections between participating
--processes in order to transport the portals requests. The second,
--ernal, provides a simple transport protocol which runs over
--UDP datagrams.
--
--The interface functions return both of these values in host order for
--convenience and readability. However this means that addresses
--exchanged in messages between hosts of different orderings will not
--function properly.
--
--Both NALs use the same support functions in order to schedule events
--and communicate with the generic portals implementation.
--
-- -------------------------
-- | api |
-- |_______________________|
-- | lib |
-- |_______________________|
-- | ernal | |tcpnal |
-- |--------| |----------|
-- | udpsock| |connection|
-- |-----------------------|
-- | timer/select |
-- -------------------------
--
--
-- These NALs uses the framework from fdnal of a pipe between the api
--and library sides. This is wrapped up in the select on the library
--side, and blocks on the api side. Performance could be severely
--enhanced by collapsing this aritificial barrier, by using shared
--memory queues, or by wiring the api layer directly to the library.
--
--
--nid is defined as the low order 24-bits of the IP address of the
--physical node left shifted by 8 plus a virtual node number of 0
--through 255 (really only 239). The virtual node number of a tcpnal
--application should be specified using the environment variable
--PTL_VIRTNODE. pid is now a completely arbitrary number in the
--range of 0 to 255. The IP interface used can be overridden by
--specifying the appropriate hostid by setting the PTL_HOSTID
--environment variable. The value can be either dotted decimal
--(n.n.n.n) or hex starting with "0x".
--TCPNAL:
-- As the NAL needs to try to send to a particular nid/pid pair, it
-- will open up connections on demand. Because the port associated with
-- the connecting socket is different from the bound port, two
-- connections will normally be established between a pair of peers, with
-- data flowing from the anonymous connect (active) port to the advertised
-- or well-known bound (passive) port of each peer.
--
-- Should the connection fail to open, an error is reported to the
-- library component, which causes the api request to fail.
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* address.c:
-- * this file provides functions to aquire the IP address of the node
-- * and translate them into a NID/PID pair which supports a static
-- * mapping of virtual nodes into the port range of an IP socket.
--*/
--
--#include <stdlib.h>
--#include <netdb.h>
--#include <unistd.h>
--#include <stdio.h>
--#include <portals/p30.h>
--#include <bridge.h>
--#include <ipmap.h>
--
--
--/* Function: get_node_id
-- * Returns: a 32 bit id for this node, actually a big-endian IP address
-- *
-- * get_node_id() determines the host name and uses the resolver to
-- * find out its ip address. This is fairly fragile and inflexible, but
-- * explicitly asking about interfaces and their addresses is very
-- * complicated and nonportable.
-- */
--static unsigned int get_node_id(void)
--{
-- char buffer[255];
-- unsigned int x;
-- struct hostent *he;
-- char * host_envp;
--
-- if (!(host_envp = getenv("PTL_HOSTID")))
-- {
-- gethostname(buffer,sizeof(buffer));
-- he=gethostbyname(buffer);
-- if (he)
-- x=*(unsigned int *)he->h_addr_list[0];
-- else
-- x = 0;
-- return(ntohl(x));
-- }
-- else
-- {
-- if (host_envp[1] != 'x')
-- {
-- int a, b, c, d;
-- sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
-- return ((a<<24) | (b<<16) | (c<<8) | d);
-- }
-- else
-- {
-- long long hostid = strtoll(host_envp, 0, 0);
-- return((unsigned int) hostid);
-- }
-- }
--}
--
--
--/* Function: set_address
-- * Arugments: t: a procnal structure to populate with the request
-- *
-- * set_address performs the bit manipulations to set the nid, pid, and
-- * iptop8 fields of the procnal structures.
-- *
-- * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
-- */
--
--#ifdef DIRECT_IP_MODE
--void set_address(bridge t,ptl_pid_t pidrequest)
--{
-- int port;
-- if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
-- else port=pidrequest;
- t->nal_cb->ni.nid=get_node_id();
- t->nal_cb->ni.pid=port;
- t->lib_nal->libnal_ni.ni_pid.nid=get_node_id();
- t->lib_nal->libnal_ni.ni_pid.pid=port;
--}
--#else
--
--void set_address(bridge t,ptl_pid_t pidrequest)
--{
-- int virtnode, in_addr, port;
-- ptl_pid_t pid;
--
-- /* get and remember my node id*/
-- if (!getenv("PTL_VIRTNODE"))
-- virtnode = 0;
-- else
-- {
-- int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
-- >> PNAL_VNODE_SHIFT);
-- virtnode = atoi(getenv("PTL_VIRTNODE"));
-- if (virtnode > maxvnode)
-- {
-- fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
-- virtnode, maxvnode);
-- return;
-- }
-- }
--
-- in_addr = get_node_id();
--
-- t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
- t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK)
- << PNAL_VNODE_SHIFT)
- + virtnode;
-
- t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK)
- << PNAL_VNODE_SHIFT)
- + virtnode;
-- pid=pidrequest;
-- /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
--#ifdef notyet
-- if (pid==(unsigned short)PTL_PID_ANY) port = 0;
--#endif
-- if (pid==(unsigned short)PTL_PID_ANY)
-- {
-- fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
-- return;
-- }
-- else if (pid > PNAL_PID_MASK)
-- {
-- fprintf(stderr, "portal pid of %d is too large - max %d\n",
-- pid, PNAL_PID_MASK);
-- return;
-- }
-- else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
- t->nal_cb->ni.pid=pid;
- t->lib_nal->libnal_ni.ni_pid.pid=pid;
--}
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#ifndef TCPNAL_PROCBRIDGE_H
--#define TCPNAL_PROCBRIDGE_H
--
--#include <portals/lib-p30.h>
-#include <portals/nal.h>
-
-#define PTL_IFACE_TCP 1
-#define PTL_IFACE_ER 2
-#define PTL_IFACE_SS 3
-#define PTL_IFACE_MAX 4
--
--typedef struct bridge {
-- int alive;
- nal_cb_t *nal_cb;
- lib_nal_t *lib_nal;
-- void *lower;
-- void *local;
-- void (*shutdown)(struct bridge *);
-- /* this doesn't really belong here */
-- unsigned char iptop8;
--} *bridge;
-
--
- nal_t *bridge_init(ptl_interface_t nal,
- ptl_pid_t pid_request,
- ptl_ni_limits_t *desired,
- ptl_ni_limits_t *actual,
- int *rc);
--
--typedef int (*nal_initialize)(bridge);
--extern nal_initialize nal_table[PTL_IFACE_MAX];
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* connection.c:
-- This file provides a simple stateful connection manager which
-- builds tcp connections on demand and leaves them open for
-- future use. It also provides the machinery to allow peers
-- to connect to it
--*/
--
--#include <stdlib.h>
--#include <pqtimer.h>
--#include <dispatch.h>
--#include <table.h>
--#include <stdio.h>
--#include <stdarg.h>
--#include <string.h>
--#include <unistd.h>
--#include <sys/types.h>
--#include <sys/socket.h>
--#include <netinet/in.h>
--#include <netinet/tcp.h>
--#include <portals/types.h>
--#include <portals/list.h>
--#include <portals/lib-types.h>
--#include <portals/socknal.h>
--#include <linux/kp30.h>
--#include <connection.h>
--#include <pthread.h>
--#include <errno.h>
--#ifndef __CYGWIN__
--#include <syscall.h>
--#endif
--
--/* global variable: acceptor port */
--unsigned short tcpnal_acceptor_port = 988;
--
--
--/* Function: compare_connection
-- * Arguments: connection c: a connection in the hash table
-- * ptl_process_id_t: an id to verify agains
-- * Returns: 1 if the connection is the one requested, 0 otherwise
-- *
-- * compare_connection() tests for collisions in the hash table
-- */
--static int compare_connection(void *arg1, void *arg2)
--{
-- connection c = arg1;
-- unsigned int * id = arg2;
--#if 0
-- return((c->ip==id[0]) && (c->port==id[1]));
--#else
-- /* CFS specific hacking */
-- return (c->ip == id[0]);
--#endif
--}
--
--
--/* Function: connection_key
-- * Arguments: ptl_process_id_t id: an id to hash
-- * Returns: a not-particularily-well-distributed hash
-- * of the id
-- */
--static unsigned int connection_key(unsigned int *id)
--{
--#if 0
-- return(id[0]^id[1]);
--#else
-- /* CFS specific hacking */
-- return (unsigned int) id[0];
--#endif
--}
--
--
--/* Function: remove_connection
-- * Arguments: c: the connection to remove
-- */
--void remove_connection(void *arg)
--{
-- connection c = arg;
-- unsigned int id[2];
--
-- id[0]=c->ip;
-- id[1]=c->port;
-- hash_table_remove(c->m->connections,id);
-- close(c->fd);
-- free(c);
--}
--
--
--/* Function: read_connection:
-- * Arguments: c: the connection to read from
-- * dest: the buffer to read into
-- * len: the number of bytes to read
-- * Returns: success as 1, or failure as 0
-- *
-- * read_connection() reads data from the connection, continuing
-- * to read partial results until the request is satisfied or
-- * it errors. TODO: this read should be covered by signal protection.
-- */
--int read_connection(connection c,
-- unsigned char *dest,
-- int len)
--{
-- int offset = 0,rc;
--
-- if (len) {
-- do {
--#ifndef __CYGWIN__
-- rc = syscall(SYS_read, c->fd, dest+offset, len-offset);
--#else
-- rc = recv(c->fd, dest+offset, len-offset, 0);
--#endif
-- if (rc <= 0) {
-- if (errno == EINTR) {
-- rc = 0;
-- } else {
-- remove_connection(c);
-- return (0);
-- }
-- }
-- offset += rc;
-- } while (offset < len);
-- }
-- return (1);
--}
--
--static int connection_input(void *d)
--{
-- connection c = d;
-- return((*c->m->handler)(c->m->handler_arg,c));
--}
--
--
--/* Function: allocate_connection
-- * Arguments: t: tcpnal the allocation is occuring in the context of
-- * dest: portal endpoint address for this connection
-- * fd: open file descriptor for the socket
-- * Returns: an allocated connection structure
-- *
-- * just encompasses the action common to active and passive
-- * connections of allocation and placement in the global table
-- */
--static connection allocate_connection(manager m,
-- unsigned int ip,
-- unsigned short port,
-- int fd)
--{
-- connection c=malloc(sizeof(struct connection));
-- unsigned int id[2];
-- c->m=m;
-- c->fd=fd;
-- c->ip=ip;
-- c->port=port;
-- id[0]=ip;
-- id[1]=port;
-- register_io_handler(fd,READ_HANDLER,connection_input,c);
-- hash_table_insert(m->connections,c,id);
-- return(c);
--}
--
--
--/* Function: new_connection
-- * Arguments: t: opaque argument holding the tcpname
-- * Returns: 1 in order to reregister for new connection requests
-- *
-- * called when the bound service socket recieves
-- * a new connection request, it always accepts and
-- * installs a new connection
-- */
--static int new_connection(void *z)
--{
-- manager m=z;
-- struct sockaddr_in s;
-- int len=sizeof(struct sockaddr_in);
-- int fd=accept(m->bound,(struct sockaddr *)&s,&len);
-- unsigned int nid=*((unsigned int *)&s.sin_addr);
-- /* cfs specific hack */
-- //unsigned short pid=s.sin_port;
-- pthread_mutex_lock(&m->conn_lock);
-- allocate_connection(m,htonl(nid),0/*pid*/,fd);
-- pthread_mutex_unlock(&m->conn_lock);
-- return(1);
--}
-
- /* FIXME assuming little endian, cleanup!! */
- #define __cpu_to_le64(x) ((__u64)(x))
- #define __le64_to_cpu(x) ((__u64)(x))
- #define __cpu_to_le32(x) ((__u32)(x))
- #define __le32_to_cpu(x) ((__u32)(x))
- #define __cpu_to_le16(x) ((__u16)(x))
- #define __le16_to_cpu(x) ((__u16)(x))
--
--extern ptl_nid_t tcpnal_mynid;
--
--int
--tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
--{
-- int rc;
- int nob;
-- ptl_hdr_t hdr;
-- ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
--
-- LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
--
-- memset (&hdr, 0, sizeof (hdr));
- hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC);
- hmv->version_major = __cpu_to_le32 (PORTALS_PROTO_VERSION_MAJOR);
- hmv->version_minor = __cpu_to_le32 (PORTALS_PROTO_VERSION_MINOR);
- hmv->magic = cpu_to_le32(PORTALS_PROTO_MAGIC);
- hmv->version_major = cpu_to_le32(PORTALS_PROTO_VERSION_MAJOR);
- hmv->version_minor = cpu_to_le32(PORTALS_PROTO_VERSION_MINOR);
--
- hdr.src_nid = __cpu_to_le64 (tcpnal_mynid);
- hdr.type = __cpu_to_le32 (PTL_MSG_HELLO);
- hdr.src_nid = cpu_to_le64(tcpnal_mynid);
- hdr.type = cpu_to_le32(PTL_MSG_HELLO);
--
- hdr.msg.hello.type = __cpu_to_le32 (type);
- hdr.msg.hello.incarnation = 0;
- hdr.msg.hello.type = cpu_to_le32(type);
- hdr.msg.hello.incarnation = cpu_to_le64(incarnation);
-
- /* I don't send any interface info */
--
-- /* Assume sufficient socket buffering for this message */
-- rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
-- if (rc <= 0) {
-- CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
-- return (rc);
-- }
--
-- rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
-- if (rc <= 0) {
-- CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
-- return (rc);
-- }
--
- if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) {
- if (hmv->magic != le32_to_cpu(PORTALS_PROTO_MAGIC)) {
-- CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
- __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid);
- cpu_to_le32(hmv->magic), PORTALS_PROTO_MAGIC, *nid);
-- return (-EPROTO);
-- }
--
- if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
- hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
- if (hmv->version_major != cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
- hmv->version_minor != cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
-- CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
-- " from "LPX64"\n",
- __le16_to_cpu (hmv->version_major),
- __le16_to_cpu (hmv->version_minor),
- le16_to_cpu (hmv->version_major),
- le16_to_cpu (hmv->version_minor),
-- PORTALS_PROTO_VERSION_MAJOR,
-- PORTALS_PROTO_VERSION_MINOR,
-- *nid);
-- return (-EPROTO);
-- }
--
- #if (PORTALS_PROTO_VERSION_MAJOR != 0)
- # error "This code only understands protocol version 0.x"
-#if (PORTALS_PROTO_VERSION_MAJOR != 1)
-# error "This code only understands protocol version 1.x"
--#endif
- /* version 0 sends magic/version as the dest_nid of a 'hello' header,
- /* version 1 sends magic/version as the dest_nid of a 'hello' header,
-- * so read the rest of it in now... */
--
-- rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
-- if (rc <= 0) {
-- CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
-- rc, *nid);
-- return (rc);
-- }
--
-- /* ...and check we got what we expected */
- if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) ||
- hdr.payload_length != __cpu_to_le32 (0)) {
- CERROR ("Expecting a HELLO hdr with 0 payload,"
- if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) {
- CERROR ("Expecting a HELLO hdr "
-- " but got type %d with %d payload from "LPX64"\n",
- __le32_to_cpu (hdr.type),
- __le32_to_cpu (hdr.payload_length), *nid);
- le32_to_cpu (hdr.type),
- le32_to_cpu (hdr.payload_length), *nid);
-- return (-EPROTO);
-- }
--
- if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
- if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
-- CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n");
-- return (-EPROTO);
-- }
--
-- if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */
- *nid = __le64_to_cpu(hdr.src_nid);
- } else if (*nid != __le64_to_cpu (hdr.src_nid)) {
- *nid = le64_to_cpu(hdr.src_nid);
- } else if (*nid != le64_to_cpu (hdr.src_nid)) {
-- CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
- __le64_to_cpu (hdr.src_nid), *nid);
- le64_to_cpu (hdr.src_nid), *nid);
- return (-EPROTO);
- }
-
- /* Ignore any interface info in the payload */
- nob = le32_to_cpu(hdr.payload_length);
- if (nob > getpagesize()) {
- CERROR("Unexpected HELLO payload %d from "LPX64"\n",
- nob, *nid);
-- return (-EPROTO);
- }
- if (nob > 0) {
- char *space = (char *)malloc(nob);
-
- if (space == NULL) {
- CERROR("Can't allocate scratch buffer %d\n", nob);
- return (-ENOMEM);
- }
-
- rc = syscall(SYS_read, sockfd, space, nob);
- if (rc <= 0) {
- CERROR("Error %d skipping HELLO payload from "
- LPX64"\n", rc, *nid);
- return (rc);
- }
-- }
--
-- return (0);
--}
--
--/* Function: force_tcp_connection
-- * Arguments: t: tcpnal
-- * dest: portals endpoint for the connection
-- * Returns: an allocated connection structure, either
-- * a pre-existing one, or a new connection
-- */
--connection force_tcp_connection(manager m,
-- unsigned int ip,
-- unsigned short port,
-- procbridge pb)
--{
-- connection conn;
-- struct sockaddr_in addr;
- struct sockaddr_in locaddr;
-- unsigned int id[2];
-- struct timeval tv;
-- __u64 incarnation;
--
- int fd;
- int option;
- int rc;
- int rport;
- ptl_nid_t peernid = PTL_NID_ANY;
- port = tcpnal_acceptor_port;
--
-- id[0] = ip;
-- id[1] = port;
--
-- pthread_mutex_lock(&m->conn_lock);
--
-- conn = hash_table_find(m->connections, id);
- if (conn)
- goto out;
-
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(ip);
- addr.sin_port = htons(port);
- if (!conn) {
- int fd;
- int option;
- ptl_nid_t peernid = PTL_NID_ANY;
--
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_addr.s_addr = INADDR_ANY;
- bzero((char *) &addr, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(ip);
- addr.sin_port = htons(port);
--
- for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- perror("tcpnal socket failed");
- goto out;
- }
-
- option = 1;
- rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
- &option, sizeof(option));
- if (rc != 0) {
- perror ("Can't set SO_REUSEADDR for socket");
- close(fd);
- goto out;
- }
- if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
- perror("tcpnal socket failed");
- exit(-1);
- }
- if (connect(fd, (struct sockaddr *)&addr,
- sizeof(struct sockaddr_in))) {
- perror("tcpnal connect");
- return(0);
- }
--
- locaddr.sin_port = htons(rport);
- rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr));
- if (rc == 0 || errno == EACCES) {
- rc = connect(fd, (struct sockaddr *)&addr,
- sizeof(struct sockaddr_in));
- if (rc == 0) {
- break;
- } else if (errno != EADDRINUSE) {
- perror("Error connecting to remote host");
- close(fd);
- goto out;
- }
- } else if (errno != EADDRINUSE) {
- perror("Error binding to privileged port");
- close(fd);
- goto out;
- }
- close(fd);
- }
-
- if (rport == IPPORT_RESERVED / 2) {
- fprintf(stderr, "Out of ports trying to bind to a reserved port\n");
- goto out;
- }
-
--#if 1
- option = 1;
- setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
- option = 1<<20;
- setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
- option = 1<<20;
- setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
- option = 1;
- setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
- option = 1<<20;
- setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
- option = 1<<20;
- setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
--#endif
--
- gettimeofday(&tv, NULL);
- incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
- gettimeofday(&tv, NULL);
- incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
--
- /* say hello */
- if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation))
- /* say hello */
- if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation))
-- exit(-1);
-
- conn = allocate_connection(m, ip, port, fd);
-
- /* let nal thread know this event right away */
- if (conn)
- procbridge_wakeup_nal(pb);
--
- out:
- conn = allocate_connection(m, ip, port, fd);
-
- /* let nal thread know this event right away */
- if (conn)
- procbridge_wakeup_nal(pb);
- }
-
-- pthread_mutex_unlock(&m->conn_lock);
-- return (conn);
--}
-
--
--/* Function: bind_socket
-- * Arguments: t: the nal state for this interface
-- * port: the port to attempt to bind to
-- * Returns: 1 on success, or 0 on error
-- *
-- * bind_socket() attempts to allocate and bind a socket to the requested
-- * port, or dynamically assign one from the kernel should the port be
-- * zero. Sets the bound and bound_handler elements of m.
-- *
-- * TODO: The port should be an explicitly sized type.
-- */
--static int bind_socket(manager m,unsigned short port)
--{
-- struct sockaddr_in addr;
-- int alen=sizeof(struct sockaddr_in);
--
-- if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0)
-- return(0);
--
-- bzero((char *) &addr, sizeof(addr));
-- addr.sin_family = AF_INET;
-- addr.sin_addr.s_addr = 0;
-- addr.sin_port = htons(port);
--
-- if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){
-- perror ("tcpnal bind");
-- return(0);
-- }
--
-- getsockname(m->bound,(struct sockaddr *)&addr, &alen);
--
-- m->bound_handler=register_io_handler(m->bound,READ_HANDLER,
-- new_connection,m);
-- listen(m->bound,5);
-- m->port=addr.sin_port;
-- return(1);
--}
--
--
--/* Function: shutdown_connections
-- * Arguments: m: the manager structure
-- *
-- * close all connections and reclaim resources
-- */
--void shutdown_connections(manager m)
--{
-- close(m->bound);
-- remove_io_handler(m->bound_handler);
-- hash_destroy_table(m->connections,remove_connection);
-- free(m);
--}
--
--
--/* Function: init_connections
-- * Arguments: t: the nal state for this interface
-- * port: the port to attempt to bind to
-- * Returns: a newly allocated manager structure, or
-- * zero if the fixed port could not be bound
-- */
--manager init_connections(unsigned short pid,
-- int (*input)(void *, void *),
-- void *a)
--{
-- manager m = (manager)malloc(sizeof(struct manager));
-- m->connections = hash_create_table(compare_connection,connection_key);
-- m->handler = input;
-- m->handler_arg = a;
-- pthread_mutex_init(&m->conn_lock, 0);
--
-- if (bind_socket(m,pid))
-- return(m);
--
-- free(m);
-- return(0);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#include <table.h>
--#include <procbridge.h>
--
--typedef struct manager {
-- table connections;
-- pthread_mutex_t conn_lock; /* protect connections table */
-- int bound;
-- io_handler bound_handler;
-- int (*handler)(void *, void *);
-- void *handler_arg;
-- unsigned short port;
--} *manager;
--
--
--typedef struct connection {
-- unsigned int ip;
-- unsigned short port;
-- int fd;
-- manager m;
--} *connection;
--
--connection force_tcp_connection(manager m, unsigned int ip, unsigned int short,
-- procbridge pb);
--manager init_connections(unsigned short, int (*f)(void *, void *), void *);
--void remove_connection(void *arg);
--void shutdown_connections(manager m);
--int read_connection(connection c, unsigned char *dest, int len);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2002 Cluster File Systems, Inc.
-- * Author: Phil Schwan <phil@clusterfs.com>
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#include <stdio.h>
--#include <fcntl.h>
--#include <errno.h>
--#include <stdarg.h>
--#include <sys/time.h>
--
--int smp_processor_id = 1;
--char debug_file_path[1024] = "/tmp/lustre-log";
--char debug_file_name[1024];
--FILE *debug_file_fd;
--
--int portals_do_debug_dumplog(void *arg)
--{
-- printf("Look in %s\n", debug_file_name);
-- return 0;
--}
--
--
--void portals_debug_print(void)
--{
-- return;
--}
--
--
--void portals_debug_dumplog(void)
--{
-- printf("Look in %s\n", debug_file_name);
-- return;
--}
--
--
--int portals_debug_init(unsigned long bufsize)
--{
-- debug_file_fd = stdout;
-- return 0;
--}
--
--int portals_debug_cleanup(void)
--{
-- return 0; //close(portals_debug_fd);
--}
--
--int portals_debug_clear_buffer(void)
--{
-- return 0;
--}
--
--int portals_debug_mark_buffer(char *text)
--{
--
-- fprintf(debug_file_fd, "*******************************************************************************\n");
-- fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text);
-- fprintf(debug_file_fd, "*******************************************************************************\n");
--
-- return 0;
--}
--
--int portals_debug_copy_to_user(char *buf, unsigned long len)
--{
-- return 0;
--}
--
--/* FIXME: I'm not very smart; someone smarter should make this better. */
--void
--portals_debug_msg (int subsys, int mask, char *file, const char *fn,
-- const int line, const char *format, ...)
--{
-- va_list ap;
-- unsigned long flags;
-- struct timeval tv;
-- int nob;
--
--
-- /* NB since we pass a non-zero sized buffer (at least) on the first
-- * print, we can be assured that by the end of all the snprinting,
-- * we _do_ have a terminated buffer, even if our message got truncated.
-- */
--
-- gettimeofday(&tv, NULL);
--
-- nob += fprintf(debug_file_fd,
-- "%02x:%06x:%d:%lu.%06lu ",
-- subsys >> 24, mask, smp_processor_id,
-- tv.tv_sec, tv.tv_usec);
--
-- nob += fprintf(debug_file_fd,
-- "(%s:%d:%s() %d+%ld): ",
-- file, line, fn, 0,
-- 8192 - ((unsigned long)&flags & 8191UL));
--
-- va_start (ap, format);
-- nob += fprintf(debug_file_fd, format, ap);
-- va_end (ap);
--
--
--}
--
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--/* this file is only called dispatch.h to prevent it
-- from colliding with /usr/include/sys/select.h */
--
--typedef struct io_handler *io_handler;
--
--struct io_handler{
-- io_handler *last;
-- io_handler next;
-- int fd;
-- int type;
-- int (*function)(void *);
-- void *argument;
-- int disabled;
--};
--
--
--#define READ_HANDLER 1
--#define WRITE_HANDLER 2
--#define EXCEPTION_HANDLER 4
--#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER)
--
--io_handler register_io_handler(int fd,
-- int type,
-- int (*function)(void *),
-- void *arg);
--
--void remove_io_handler (io_handler i);
--void init_unix_timer(void);
--void select_timer_block(when until);
--when now(void);
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#define DIRECT_IP_MODE
--#ifdef DIRECT_IP_MODE
--#define PNAL_NID(in_addr, port) (in_addr)
--#define PNAL_PID(pid) (pid)
--#define PNAL_IP(in_addr, port) (in_addr)
--#define PNAL_PORT(nid, pid) (pid)
--#else
--
--#define PNAL_BASE_PORT 4096
--#define PNAL_HOSTID_SHIFT 24
--#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
--#define PNAL_VNODE_SHIFT 8
--#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
--#define PNAL_PID_SHIFT 8
--#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
--
--#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
-- << PNAL_VNODE_SHIFT) \
-- | (((ntohs(port)-PNAL_BASE_PORT) >>\
-- PNAL_PID_SHIFT)))
--#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK)
--
--#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\
-- >> PNAL_VNODE_SHIFT)\
-- | (t->iptop8 << PNAL_HOSTID_SHIFT)))
--#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
-- << PNAL_VNODE_SHIFT) \
-- | ((pid) & PNAL_PID_MASK)) \
-- + PNAL_BASE_PORT))
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* timer.c:
-- * this file implements a simple priority-queue based timer system. when
-- * combined with a file which implements now() and block(), it can
-- * be used to provide course-grained time-based callbacks.
-- */
--
--#include <pqtimer.h>
--#include <stdlib.h>
--#include <string.h>
--
--struct timer {
-- void (*function)(void *);
-- void *arg;
-- when w;
-- int interval;
-- int disable;
--};
--
--typedef struct thunk *thunk;
--struct thunk {
-- void (*f)(void *);
-- void *a;
-- thunk next;
--};
--
--extern when now(void);
--
--static thunk thunks;
--static int internal;
--static void (*block_function)(when);
--static int number_of_timers;
--static int size_of_pqueue;
--static timer *timers;
--
--
--static void heal(int where)
--{
-- int left=(where<<1);
-- int right=(where<<1)+1;
-- int min=where;
-- timer temp;
--
-- if (left <= number_of_timers)
-- if (timers[left]->w < timers[min]->w) min=left;
-- if (right <= number_of_timers)
-- if (timers[right]->w < timers[min]->w) min=right;
-- if (min != where){
-- temp=timers[where];
-- timers[where]=timers[min];
-- timers[min]=temp;
-- heal(min);
-- }
--}
--
--static void add_pqueue(int i)
--{
-- timer temp;
-- int parent=(i>>1);
-- if ((i>1) && (timers[i]->w< timers[parent]->w)){
-- temp=timers[i];
-- timers[i]=timers[parent];
-- timers[parent]=temp;
-- add_pqueue(parent);
-- }
--}
--
--static void add_timer(timer t)
--{
-- if (size_of_pqueue<(number_of_timers+2)){
-- int oldsize=size_of_pqueue;
-- timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10));
-- memcpy(new,timers,sizeof(timer)*oldsize);
-- timers=new;
-- }
-- timers[++number_of_timers]=t;
-- add_pqueue(number_of_timers);
--}
--
--/* Function: register_timer
-- * Arguments: interval: the time interval from the current time when
-- * the timer function should be called
-- * function: the function to call when the time has expired
-- * argument: the argument to call it with.
-- * Returns: a pointer to a timer structure
-- */
--timer register_timer(when interval,
-- void (*function)(void *),
-- void *argument)
--{
-- timer t=(timer)malloc(sizeof(struct timer));
--
-- t->arg=argument;
-- t->function=function;
-- t->interval=interval;
-- t->disable=0;
-- t->w=now()+interval;
-- add_timer(t);
-- if (!internal && (number_of_timers==1))
-- block_function(t->w);
-- return(t);
--}
--
--/* Function: remove_timer
-- * Arguments: t:
-- * Returns: nothing
-- *
-- * remove_timer removes a timer from the system, insuring
-- * that it will never be called. It does not actually
-- * free the timer due to reentrancy issues.
-- */
--
--void remove_timer(timer t)
--{
-- t->disable=1;
--}
--
--
--
--void timer_fire()
--{
-- timer current;
--
-- current=timers[1];
-- timers[1]=timers[number_of_timers--];
-- heal(1);
-- if (!current->disable) {
-- (*current->function)(current->arg);
-- }
-- free(current);
--}
--
--when next_timer(void)
--{
-- when here=now();
--
-- while (number_of_timers && (timers[1]->w <= here)) timer_fire();
-- if (number_of_timers) return(timers[1]->w);
-- return(0);
--}
--
--/* Function: timer_loop
-- * Arguments: none
-- * Returns: never
-- *
-- * timer_loop() is the blocking dispatch function for the timer.
-- * Is calls the block() function registered with init_timer,
-- * and handles associated with timers that have been registered.
-- */
--void timer_loop()
--{
-- when here;
--
-- while (1){
-- thunk z;
-- here=now();
--
-- for (z=thunks;z;z=z->next) (*z->f)(z->a);
--
-- if (number_of_timers){
-- if (timers[1]->w > here){
-- (*block_function)(timers[1]->w);
-- } else {
-- timer_fire();
-- }
-- } else {
-- thunk z;
-- for (z=thunks;z;z=z->next) (*z->f)(z->a);
-- (*block_function)(0);
-- }
-- }
--}
--
--
--/* Function: register_thunk
-- * Arguments: f: the function to call
-- * a: the single argument to call it with
-- *
-- * Thunk functions get called at irregular intervals, they
-- * should not assume when, or take a particularily long
-- * amount of time. Thunks are for background cleanup tasks.
-- */
--void register_thunk(void (*f)(void *),void *a)
--{
-- thunk t=(void *)malloc(sizeof(struct thunk));
-- t->f=f;
-- t->a=a;
-- t->next=thunks;
-- thunks=t;
--}
--
--/* Function: initialize_timer
-- * Arguments: block: the function to call to block for the specified interval
-- *
-- * initialize_timer() must be called before any other timer function,
-- * including timer_loop.
-- */
--void initialize_timer(void (*block)(when))
--{
-- block_function=block;
-- number_of_timers=0;
-- size_of_pqueue=10;
-- timers=(timer *)malloc(sizeof(timer)*size_of_pqueue);
-- thunks=0;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--typedef unsigned long long when;
--when now(void);
--typedef struct timer *timer;
--timer register_timer(when interval,
-- void (*function)(void *),
-- void *argument);
--timer register_timer_wait(void);
--void remove_timer(timer);
--void timer_loop(void);
--void initialize_timer(void (*block)(when));
--void timer_fire(void);
--
--
--#define HZ 0x100000000ull
--
--
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2003 Cluster File Systems, Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* api.c:
-- * This file provides the 'api' side for the process-based nals.
-- * it is responsible for creating the 'library' side thread,
-- * and passing wrapped portals transactions to it.
-- *
-- * Along with initialization, shutdown, and transport to the library
-- * side, this file contains some stubs to satisfy the nal definition.
-- */
--#include <stdio.h>
--#include <stdlib.h>
--#include <unistd.h>
--#include <string.h>
--#ifndef __CYGWIN__
--#include <syscall.h>
--#endif
--#include <sys/socket.h>
--#include <procbridge.h>
--#include <pqtimer.h>
--#include <dispatch.h>
--#include <errno.h>
--
--
--/* XXX CFS workaround, to give a chance to let nal thread wake up
-- * from waiting in select
-- */
--static int procbridge_notifier_handler(void *arg)
--{
-- static char buf[8];
-- procbridge p = (procbridge) arg;
--
-- syscall(SYS_read, p->notifier[1], buf, sizeof(buf));
-- return 1;
--}
--
--void procbridge_wakeup_nal(procbridge p)
--{
-- static char buf[8];
-- syscall(SYS_write, p->notifier[0], buf, sizeof(buf));
- }
-
- /* Function: forward
- * Arguments: nal_t *nal: pointer to my top-side nal structure
- * id: the command to pass to the lower layer
- * args, args_len:pointer to and length of the request
- * ret, ret_len: pointer to and size of the result
- * Returns: a portals status code
- *
- * forwards a packaged api call from the 'api' side to the 'library'
- * side, and collects the result
- */
- static int procbridge_forward(nal_t *n, int id, void *args, size_t args_len,
- void *ret, size_t ret_len)
- {
- bridge b = (bridge) n->nal_data;
-
- if (id == PTL_FINI) {
- lib_fini(b->nal_cb);
-
- if (b->shutdown)
- (*b->shutdown)(b);
- }
-
- lib_dispatch(b->nal_cb, NULL, id, args, ret);
-
- return (PTL_OK);
--}
-
--
--/* Function: shutdown
-- * Arguments: nal: a pointer to my top side nal structure
-- * ni: my network interface index
-- *
-- * cleanup nal state, reclaim the lower side thread and
-- * its state using PTL_FINI codepoint
-- */
- static int procbridge_shutdown(nal_t *n, int ni)
-static void procbridge_shutdown(nal_t *n)
--{
- bridge b=(bridge)n->nal_data;
- lib_nal_t *nal = n->nal_data;
- bridge b=(bridge)nal->libnal_data;
-- procbridge p=(procbridge)b->local;
--
-- p->nal_flags |= NAL_FLAG_STOPPING;
-- procbridge_wakeup_nal(p);
--
-- do {
-- pthread_mutex_lock(&p->mutex);
-- if (p->nal_flags & NAL_FLAG_STOPPED) {
-- pthread_mutex_unlock(&p->mutex);
-- break;
-- }
-- pthread_cond_wait(&p->cond, &p->mutex);
-- pthread_mutex_unlock(&p->mutex);
-- } while (1);
--
-- free(p);
- return(0);
- }
-
-
- /* Function: validate
- * useless stub
- */
- static int procbridge_validate(nal_t *nal, void *base, size_t extent)
- {
- return(0);
--}
-
-
- /* FIXME cfs temporary workaround! FIXME
- * global time out value
- */
- int __tcpnal_eqwait_timeout_value = 0;
- int __tcpnal_eqwait_timedout = 0;
-
- /* Function: yield
- * Arguments: pid:
- *
- * this function was originally intended to allow the
- * lower half thread to be scheduled to allow progress. we
- * overload it to explicitly block until signalled by the
- * lower half.
- */
- static void procbridge_yield(nal_t *n)
- {
- bridge b=(bridge)n->nal_data;
- procbridge p=(procbridge)b->local;
-
- pthread_mutex_lock(&p->mutex);
- if (!__tcpnal_eqwait_timeout_value) {
- pthread_cond_wait(&p->cond,&p->mutex);
- } else {
- struct timeval now;
- struct timespec timeout;
-
- gettimeofday(&now, NULL);
- timeout.tv_sec = now.tv_sec + __tcpnal_eqwait_timeout_value;
- timeout.tv_nsec = now.tv_usec * 1000;
--
- __tcpnal_eqwait_timedout =
- pthread_cond_timedwait(&p->cond, &p->mutex, &timeout);
- }
- pthread_mutex_unlock(&p->mutex);
- }
--
-/* forward decl */
-extern int procbridge_startup (nal_t *, ptl_pid_t,
- ptl_ni_limits_t *, ptl_ni_limits_t *);
--
- static void procbridge_lock(nal_t * nal, unsigned long *flags){}
- static void procbridge_unlock(nal_t * nal, unsigned long *flags){}
--/* api_nal
-- * the interface vector to allow the generic code to access
- * this nal. this is seperate from the library side nal_cb.
- * this nal. this is seperate from the library side lib_nal.
-- * TODO: should be dyanmically allocated
-- */
- static nal_t api_nal = {
- ni: {0},
-nal_t procapi_nal = {
-- nal_data: NULL,
- forward: procbridge_forward,
- shutdown: procbridge_shutdown,
- validate: procbridge_validate,
- yield: procbridge_yield,
- lock: procbridge_lock,
- unlock: procbridge_unlock
- nal_ni_init: procbridge_startup,
- nal_ni_fini: procbridge_shutdown,
--};
--
--ptl_nid_t tcpnal_mynid;
--
- /* Function: procbridge_interface
-/* Function: procbridge_startup
-- *
-- * Arguments: pid: requested process id (port offset)
-- * PTL_ID_ANY not supported.
-- * desired: limits passed from the application
-- * and effectively ignored
-- * actual: limits actually allocated and returned
-- *
- * Returns: a pointer to my statically allocated top side NAL
- * structure
- * Returns: portals rc
-- *
-- * initializes the tcp nal. we define unix_failure as an
-- * error wrapper to cut down clutter.
-- */
- nal_t *procbridge_interface(int num_interface,
- ptl_pt_index_t ptl_size,
- ptl_ac_index_t acl_size,
- ptl_pid_t requested_pid)
-int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
- ptl_ni_limits_t *requested_limits,
- ptl_ni_limits_t *actual_limits)
--{
-- nal_init_args_t args;
-
-- procbridge p;
-- bridge b;
- static int initialized=0;
- ptl_ni_limits_t limits = {-1,-1,-1,-1,-1};
- /* XXX nal_type is purely private to tcpnal here */
-- int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */
--
- if(initialized) return (&api_nal);
- LASSERT(nal == &procapi_nal);
--
-- init_unix_timer();
--
-- b=(bridge)malloc(sizeof(struct bridge));
-- p=(procbridge)malloc(sizeof(struct procbridge));
- api_nal.nal_data=b;
-- b->local=p;
-
- if (ptl_size)
- limits.max_ptable_index = ptl_size;
- if (acl_size)
- limits.max_atable_index = acl_size;
--
-- args.nia_requested_pid = requested_pid;
- args.nia_limits = &limits;
- args.nia_requested_limits = requested_limits;
- args.nia_actual_limits = actual_limits;
-- args.nia_nal_type = nal_type;
-- args.nia_bridge = b;
- args.nia_apinal = nal;
--
-- /* init procbridge */
-- pthread_mutex_init(&p->mutex,0);
-- pthread_cond_init(&p->cond, 0);
-- p->nal_flags = 0;
- pthread_mutex_init(&p->nal_cb_lock, 0);
--
-- /* initialize notifier */
-- if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) {
-- perror("socketpair failed");
- return NULL;
- return PTL_FAIL;
-- }
--
-- if (!register_io_handler(p->notifier[1], READ_HANDLER,
-- procbridge_notifier_handler, p)) {
-- perror("fail to register notifier handler");
- return NULL;
- return PTL_FAIL;
-- }
--
-- /* create nal thread */
-- if (pthread_create(&p->t, NULL, nal_thread, &args)) {
-- perror("nal_init: pthread_create");
- return(NULL);
- return PTL_FAIL;
-- }
--
-- do {
-- pthread_mutex_lock(&p->mutex);
-- if (p->nal_flags & (NAL_FLAG_RUNNING | NAL_FLAG_STOPPED)) {
-- pthread_mutex_unlock(&p->mutex);
-- break;
-- }
-- pthread_cond_wait(&p->cond, &p->mutex);
-- pthread_mutex_unlock(&p->mutex);
-- } while (1);
--
-- if (p->nal_flags & NAL_FLAG_STOPPED)
- return (NULL);
- return PTL_FAIL;
--
- b->nal_cb->ni.nid = tcpnal_mynid;
- initialized = 1;
- b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid;
--
- return (&api_nal);
- return PTL_OK;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2003 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#ifndef _PROCBRIDGE_H_
--#define _PROCBRIDGE_H_
--
--#include <pthread.h>
--#include <bridge.h>
--#include <ipmap.h>
--
--
--#define NAL_FLAG_RUNNING 1
--#define NAL_FLAG_STOPPING 2
--#define NAL_FLAG_STOPPED 4
--
--typedef struct procbridge {
-- /* sync between user threads and nal thread */
-- pthread_t t;
-- pthread_cond_t cond;
-- pthread_mutex_t mutex;
--
-- /* socket pair used to notify nal thread */
-- int notifier[2];
--
-- int nal_flags;
--
- pthread_mutex_t nal_cb_lock;
--} *procbridge;
--
--typedef struct nal_init_args {
-- ptl_pid_t nia_requested_pid;
- ptl_ni_limits_t *nia_limits;
- ptl_ni_limits_t *nia_requested_limits;
- ptl_ni_limits_t *nia_actual_limits;
-- int nia_nal_type;
-- bridge nia_bridge;
- nal_t *nia_apinal;
--} nal_init_args_t;
--
--extern void *nal_thread(void *);
--
--
--#define PTL_INIT (LIB_MAX_DISPATCH+1)
--#define PTL_FINI (LIB_MAX_DISPATCH+2)
--
--#define MAX_ACLS 1
--#define MAX_PTLS 128
--
--extern void set_address(bridge t,ptl_pid_t pidrequest);
- extern nal_t *procbridge_interface(int num_interface,
- ptl_pt_index_t ptl_size,
- ptl_ac_index_t acl_size,
- ptl_pid_t requested_pid);
--extern void procbridge_wakeup_nal(procbridge p);
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2003 Cluster File Systems, Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* lib.c:
-- * This file provides the 'library' side for the process-based nals.
-- * it is responsible for communication with the 'api' side and
-- * providing service to the generic portals 'library'
-- * implementation. 'library' might be better termed 'communication'
-- * or 'kernel'.
-- */
--
--#include <stdlib.h>
--#include <stdio.h>
--#include <stdarg.h>
--#include <unistd.h>
--#include <procbridge.h>
--#include <sys/types.h>
--#include <sys/socket.h>
--#include <netdb.h>
--#include <errno.h>
--#include <timer.h>
--#include <dispatch.h>
--
--/* the following functions are stubs to satisfy the nal definition
-- without doing anything particularily useful*/
-
- static ptl_err_t nal_write(nal_cb_t *nal,
- void *private,
- user_ptr dst_addr,
- void *src_addr,
- size_t len)
- {
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
- }
-
- static ptl_err_t nal_read(nal_cb_t * nal,
- void *private,
- void *dst_addr,
- user_ptr src_addr,
- size_t len)
- {
- memcpy(dst_addr, src_addr, len);
- return PTL_OK;
- }
-
- static void *nal_malloc(nal_cb_t *nal,
- size_t len)
- {
- void *buf = malloc(len);
- return buf;
- }
-
- static void nal_free(nal_cb_t *nal,
- void *buf,
- size_t len)
- {
- free(buf);
- }
-
- static void nal_printf(nal_cb_t *nal,
- const char *fmt,
- ...)
- {
- va_list ap;
-
- va_start(ap, fmt);
- vprintf(fmt, ap);
- va_end(ap);
- }
-
-
- static void nal_cli(nal_cb_t *nal,
- unsigned long *flags)
- {
- bridge b = (bridge) nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_lock(&p->nal_cb_lock);
- }
-
-
- static void nal_sti(nal_cb_t *nal,
- unsigned long *flags)
- {
- bridge b = (bridge)nal->nal_data;
- procbridge p = (procbridge) b->local;
-
- pthread_mutex_unlock(&p->nal_cb_lock);
- }
-
--
- static int nal_dist(nal_cb_t *nal,
-static int nal_dist(lib_nal_t *nal,
-- ptl_nid_t nid,
-- unsigned long *dist)
--{
-- return 0;
--}
--
- static void wakeup_topside(void *z)
-static void check_stopping(void *z)
--{
-- bridge b = z;
-- procbridge p = b->local;
- int stop;
--
- if ((p->nal_flags & NAL_FLAG_STOPPING) == 0)
- return;
-
-- pthread_mutex_lock(&p->mutex);
- stop = p->nal_flags & NAL_FLAG_STOPPING;
- if (stop)
- p->nal_flags |= NAL_FLAG_STOPPED;
- p->nal_flags |= NAL_FLAG_STOPPED;
-- pthread_cond_broadcast(&p->cond);
-- pthread_mutex_unlock(&p->mutex);
--
- if (stop)
- pthread_exit(0);
- pthread_exit(0);
--}
--
--
--/* Function: nal_thread
-- * Arguments: z: an opaque reference to a nal control structure
-- * allocated and partially populated by the api level code
-- * Returns: nothing, and only on error or explicit shutdown
-- *
-- * This function is the entry point of the pthread initiated on
-- * the api side of the interface. This thread is used to handle
-- * asynchronous delivery to the application.
-- *
-- * We define a limit macro to place a ceiling on limits
-- * for syntactic convenience
-- */
- #define LIMIT(x,y,max)\
- if ((unsigned int)x > max) y = max;
-
--extern int tcpnal_init(bridge);
--
--nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0};
--
--void *nal_thread(void *z)
--{
-- nal_init_args_t *args = (nal_init_args_t *) z;
-- bridge b = args->nia_bridge;
-- procbridge p=b->local;
-- int rc;
- ptl_pid_t pid_request;
- ptl_process_id_t process_id;
-- int nal_type;
- ptl_ni_limits_t desired;
- ptl_ni_limits_t actual;
--
- b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t));
- b->nal_cb->nal_data=b;
- b->nal_cb->cb_read=nal_read;
- b->nal_cb->cb_write=nal_write;
- b->nal_cb->cb_malloc=nal_malloc;
- b->nal_cb->cb_free=nal_free;
- b->nal_cb->cb_map=NULL;
- b->nal_cb->cb_unmap=NULL;
- b->nal_cb->cb_printf=nal_printf;
- b->nal_cb->cb_cli=nal_cli;
- b->nal_cb->cb_sti=nal_sti;
- b->nal_cb->cb_dist=nal_dist;
- b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t));
- b->lib_nal->libnal_data=b;
- b->lib_nal->libnal_map=NULL;
- b->lib_nal->libnal_unmap=NULL;
- b->lib_nal->libnal_dist=nal_dist;
--
- pid_request = args->nia_requested_pid;
- desired = *args->nia_limits;
-- nal_type = args->nia_nal_type;
-
- actual = desired;
- LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES);
- LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS);
- LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS);
- LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS);
- LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS);
--
- set_address(b,pid_request);
- /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which
- * lib_init() is about to do from the process_id passed to it...*/
- set_address(b,args->nia_requested_pid);
--
- process_id = b->lib_nal->libnal_ni.ni_pid;
-
-- if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
-- /* initialize the generic 'library' level code */
--
- rc = lib_init(b->nal_cb,
- b->nal_cb->ni.nid,
- b->nal_cb->ni.pid,
- 10,
- actual.max_ptable_index,
- actual.max_atable_index);
- rc = lib_init(b->lib_nal, args->nia_apinal,
- process_id,
- args->nia_requested_limits,
- args->nia_actual_limits);
--
-- /*
-- * Whatever the initialization returned is passed back to the
-- * user level code for further interpretation. We just exit if
-- * it is non-zero since something went wrong.
-- */
-- /* this should perform error checking */
-- pthread_mutex_lock(&p->mutex);
- p->nal_flags |= rc ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING;
- p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING;
-- pthread_cond_broadcast(&p->cond);
-- pthread_mutex_unlock(&p->mutex);
--
- if (!rc) {
- if (rc == PTL_OK) {
-- /* the thunk function is called each time the timer loop
-- performs an operation and returns to blocking mode. we
-- overload this function to inform the api side that
-- it may be interested in looking at the event queue */
- register_thunk(wakeup_topside,b);
- register_thunk(check_stopping,b);
-- timer_loop();
-- }
-- return(0);
--}
- #undef LIMIT
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* select.c:
-- * Provides a general mechanism for registering and dispatching
-- * io events through the select system call.
-- */
--
--#ifdef sun
--#include <sys/filio.h>
--#else
--#include <sys/ioctl.h>
--#endif
--
--#include <sys/time.h>
--#include <sys/types.h>
--#include <stdlib.h>
--#include <pqtimer.h>
--#include <dispatch.h>
--
--
--static struct timeval beginning_of_epoch;
--static io_handler io_handlers;
--
--/* Function: now
-- *
-- * Return: the current time in canonical units: a 64 bit number
-- * where the most significant 32 bits contains the number
-- * of seconds, and the least signficant a count of (1/(2^32))ths
-- * of a second.
-- */
--when now()
--{
-- struct timeval result;
--
-- gettimeofday(&result,0);
-- return((((unsigned long long)result.tv_sec)<<32)|
-- (((unsigned long long)result.tv_usec)<<32)/1000000);
--}
--
--
--/* Function: register_io_handler
-- * Arguments: fd: the file descriptor of interest
-- * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER
-- * function: a function to call when io is available on fd
-- * arg: an opaque correlator to return to the handler
-- * Returns: a pointer to the io_handler structure
-- */
--io_handler register_io_handler(int fd,
-- int type,
-- int (*function)(void *),
-- void *arg)
--{
-- io_handler i=(io_handler)malloc(sizeof(struct io_handler));
-- if ((i->fd=fd)>=0){
-- i->type=type;
-- i->function=function;
-- i->argument=arg;
-- i->disabled=0;
-- i->last=&io_handlers;
-- if ((i->next=io_handlers)) i->next->last=&i->next;
-- io_handlers=i;
-- }
-- return(i);
--}
--
--/* Function: remove_io_handler
-- * Arguments: i: a pointer to the handler to stop servicing
-- *
-- * remove_io_handler() doesn't actually free the handler, due
-- * to reentrancy problems. it just marks the handler for
-- * later cleanup by the blocking function.
-- */
--void remove_io_handler (io_handler i)
--{
-- i->disabled=1;
--}
--
--static void set_flag(io_handler n,fd_set *fds)
--{
-- if (n->type & READ_HANDLER) FD_SET(n->fd, &fds[0]);
-- if (n->type & WRITE_HANDLER) FD_SET(n->fd,&fds[1]);
-- if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd, &fds[2]);
--}
--
--
--/* Function: select_timer_block
-- * Arguments: until: an absolute time when the select should return
-- *
-- * This function dispatches the various file descriptors' handler
-- * functions, if the kernel indicates there is io available.
-- */
--void select_timer_block(when until)
--{
-- fd_set fds[3];
-- struct timeval timeout;
-- struct timeval *timeout_pointer;
-- int result;
-- io_handler j;
-- io_handler *k;
--
-- /* TODO: loop until the entire interval is expired*/
-- if (until){
-- when interval=until-now();
-- timeout.tv_sec=(interval>>32);
-- timeout.tv_usec=((interval<<32)/1000000)>>32;
-- timeout_pointer=&timeout;
-- } else timeout_pointer=0;
--
-- FD_ZERO(&fds[0]);
-- FD_ZERO(&fds[1]);
-- FD_ZERO(&fds[2]);
-- for (k=&io_handlers;*k;){
-- if ((*k)->disabled){
-- j=*k;
-- *k=(*k)->next;
-- free(j);
-- }
-- if (*k) {
-- set_flag(*k,fds);
-- k=&(*k)->next;
-- }
-- }
--
-- result=select(FD_SETSIZE, &fds[0], &fds[1], &fds[2], timeout_pointer);
--
-- if (result > 0)
-- for (j=io_handlers;j;j=j->next){
-- if (!(j->disabled) &&
-- ((FD_ISSET(j->fd, &fds[0]) && (j->type & READ_HANDLER)) ||
-- (FD_ISSET(j->fd, &fds[1]) && (j->type & WRITE_HANDLER)) ||
-- (FD_ISSET(j->fd, &fds[2]) && (j->type & EXCEPTION_HANDLER)))){
-- if (!(*j->function)(j->argument))
-- j->disabled=1;
-- }
-- }
--}
--
--/* Function: init_unix_timer()
-- * is called to initialize the library
-- */
--void init_unix_timer()
--{
-- io_handlers=0;
-- gettimeofday(&beginning_of_epoch, 0);
-- initialize_timer(select_timer_block);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#include <table.h>
--#include <stdlib.h>
--#include <string.h>
--
--
--/* table.c:
-- * a very simple hash table implementation with paramerterizable
-- * comparison and key generation functions. it does resize
-- * in order to accomidate more entries, but never collapses
-- * the table
-- */
--
--static table_entry *table_lookup (table t,void *comparator,
-- unsigned int k,
-- int (*compare_function)(void *, void *),
-- int *success)
--{
-- unsigned int key=k%t->size;
-- table_entry *i;
--
-- for (i=&(t->entries[key]);*i;i=&((*i)->next)){
-- if (compare_function && ((*i)->key==k))
-- if ((*t->compare_function)((*i)->value,comparator)){
-- *success=1;
-- return(i);
-- }
-- }
-- *success=0;
-- return(&(t->entries[key]));
--}
--
--
--static void resize_table(table t, int size)
--{
-- int old_size=t->size;
-- table_entry *old_entries=t->entries;
-- int i;
-- table_entry j,n;
-- table_entry *position;
-- int success;
--
-- t->size=size;
-- t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size);
-- memset(t->entries,0,sizeof(table_entry)*t->size);
--
-- for (i=0;i<old_size;i++)
-- for (j=old_entries[i];j;j=n){
-- n=j->next;
-- position=table_lookup(t,0,j->key,0,&success);
-- j->next= *position;
-- *position=j;
-- }
-- free(old_entries);
--}
--
--
--/* Function: key_from_int
-- * Arguments: int i: value to compute the key of
-- * Returns: the key
-- */
--unsigned int key_from_int(int i)
--{
-- return(i);
--}
--
--
--/* Function: key_from_string
-- * Arguments: char *s: the null terminated string
-- * to compute the key of
-- * Returns: the key
-- */
--unsigned int key_from_string(char *s)
--{
-- unsigned int result=0;
-- unsigned char *n;
-- int i;
-- if (!s) return(1);
-- for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i;
-- return(result);
--}
--
--
--/* Function: hash_create_table
-- * Arguments: compare_function: a function to compare
-- * a table instance with a correlator
-- * key_function: a function to generate a 32 bit
-- * hash key from a correlator
-- * Returns: a pointer to the new table
-- */
--table hash_create_table (int (*compare_function)(void *, void *),
-- unsigned int (*key_function)(unsigned int *))
--{
-- table new=(table)malloc(sizeof(struct table));
-- memset(new, 0, sizeof(struct table));
--
-- new->compare_function=compare_function;
-- new->key_function=key_function;
-- new->number_of_entries=0;
-- new->size=4;
-- new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size);
-- memset(new->entries,0,sizeof(table_entry)*new->size);
-- return(new);
--}
--
--
--/* Function: hash_table_find
-- * Arguments: t: a table to look in
-- * comparator: a value to access the table entry
-- * Returns: the element references to by comparator, or null
-- */
--void *hash_table_find (table t, void *comparator)
--{
-- int success;
-- table_entry* entry=table_lookup(t,comparator,
-- (*t->key_function)(comparator),
-- t->compare_function,
-- &success);
-- if (success) return((*entry)->value);
-- return(0);
--}
--
--
--/* Function: hash_table_insert
-- * Arguments: t: a table to insert the object
-- * value: the object to put in the table
-- * comparator: the value by which the object
-- * will be addressed
-- * Returns: nothing
-- */
--void hash_table_insert (table t, void *value, void *comparator)
--{
-- int success;
-- unsigned int k=(*t->key_function)(comparator);
-- table_entry *position=table_lookup(t,comparator,k,
-- t->compare_function,&success);
-- table_entry entry;
--
-- if (success) {
-- entry = *position;
-- } else {
-- entry = (table_entry)malloc(sizeof(struct table_entry));
-- memset(entry, 0, sizeof(struct table_entry));
-- entry->next= *position;
-- *position=entry;
-- t->number_of_entries++;
-- }
-- entry->value=value;
-- entry->key=k;
-- if (t->number_of_entries > t->size) resize_table(t,t->size*2);
--}
--
--/* Function: hash_table_remove
-- * Arguments: t: the table to remove the object from
-- * comparator: the index value of the object to remove
-- * Returns:
-- */
--void hash_table_remove (table t, void *comparator)
--{
-- int success;
-- table_entry temp;
-- table_entry *position=table_lookup(t,comparator,
-- (*t->key_function)(comparator),
-- t->compare_function,&success);
-- if(success) {
-- temp=*position;
-- *position=(*position)->next;
-- free(temp); /* the value? */
-- t->number_of_entries--;
-- }
--}
--
--/* Function: hash_iterate_table_entries
-- * Arguments: t: the table to iterate over
-- * handler: a function to call with each element
-- * of the table, along with arg
-- * arg: the opaque object to pass to handler
-- * Returns: nothing
-- */
--void hash_iterate_table_entries(table t,
-- void (*handler)(void *,void *),
-- void *arg)
--{
-- int i;
-- table_entry *j,*next;
--
-- for (i=0;i<t->size;i++)
-- for (j=t->entries+i;*j;j=next){
-- next=&((*j)->next);
-- (*handler)(arg,(*j)->value);
-- }
--}
--
--/* Function: hash_filter_table_entries
-- * Arguments: t: the table to iterate over
-- * handler: a function to call with each element
-- * of the table, along with arg
-- * arg: the opaque object to pass to handler
-- * Returns: nothing
-- * Notes: operations on the table inside handler are not safe
-- *
-- * filter_table_entires() calls the handler function for each
-- * item in the table, passing it and arg. The handler function
-- * returns 1 if it is to be retained in the table, and 0
-- * if it is to be removed.
-- */
--void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg)
--{
-- int i;
-- table_entry *j,*next,v;
--
-- for (i=0;i<t->size;i++)
-- for (j=t->entries+i;*j;j=next){
-- next=&((*j)->next);
-- if (!(*handler)(arg,(*j)->value)){
-- next=j;
-- v=*j;
-- *j=(*j)->next;
-- free(v);
-- t->number_of_entries--;
-- }
-- }
--}
--
--/* Function: destroy_table
-- * Arguments: t: the table to free
-- * thunk: a function to call with each element,
-- * most likely free()
-- * Returns: nothing
-- */
--void hash_destroy_table(table t,void (*thunk)(void *))
--{
-- table_entry j,next;
-- int i;
-- for (i=0;i<t->size;i++)
-- for (j=t->entries[i];j;j=next){
-- next=j->next;
-- if (thunk) (*thunk)(j->value);
-- free(j);
-- }
-- free(t->entries);
-- free(t);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#ifndef E_TABLE
--#define E_TABLE
--
--typedef struct table_entry {
-- unsigned int key;
-- void *value;
-- struct table_entry *next;
--} *table_entry;
--
--
--typedef struct table {
-- unsigned int size;
-- int number_of_entries;
-- table_entry *entries;
-- int (*compare_function)(void *, void *);
-- unsigned int (*key_function)(unsigned int *);
--} *table;
--
--/* table.c */
--unsigned int key_from_int(int i);
--unsigned int key_from_string(char *s);
--table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
--void *hash_table_find(table t, void *comparator);
--void hash_table_insert(table t, void *value, void *comparator);
--void hash_table_remove(table t, void *comparator);
--void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg);
--void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg);
--void hash_destroy_table(table t, void (*thunk)(void *));
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2003 Cluster File Systems, Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* tcpnal.c:
-- This file implements the TCP-based nal by providing glue
-- between the connection service and the generic NAL implementation */
--
--#include <stdlib.h>
--#include <stdio.h>
--#include <stdarg.h>
--#include <unistd.h>
--#include <sys/types.h>
--#include <sys/socket.h>
--#include <netinet/in.h>
--#include <pqtimer.h>
--#include <dispatch.h>
--#include <bridge.h>
--#include <ipmap.h>
--#include <connection.h>
--#include <pthread.h>
--#include <errno.h>
--#ifndef __CYGWIN__
--#include <syscall.h>
--#endif
--
--/* Function: tcpnal_send
-- * Arguments: nal: pointer to my nal control block
-- * private: unused
-- * cookie: passed back to the portals library
-- * hdr: pointer to the portals header
-- * nid: destination node
-- * pid: destination process
-- * data: body of the message
-- * len: length of the body
-- * Returns: zero on success
-- *
-- * sends a packet to the peer, after insuring that a connection exists
-- */
- ptl_err_t tcpnal_send(nal_cb_t *n,
-ptl_err_t tcpnal_send(lib_nal_t *n,
-- void *private,
-- lib_msg_t *cookie,
-- ptl_hdr_t *hdr,
-- int type,
-- ptl_nid_t nid,
-- ptl_pid_t pid,
-- unsigned int niov,
-- struct iovec *iov,
-- size_t offset,
-- size_t len)
--{
-- connection c;
- bridge b=(bridge)n->nal_data;
- bridge b=(bridge)n->libnal_data;
-- struct iovec tiov[257];
-- static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER;
-- ptl_err_t rc = PTL_OK;
-- int sysrc;
-- int total;
-- int ntiov;
-- int i;
--
-- if (!(c=force_tcp_connection((manager)b->lower,
-- PNAL_IP(nid,b),
-- PNAL_PORT(nid,pid),
-- b->local)))
-- return(PTL_FAIL);
--
-- /* TODO: these results should be checked. furthermore, provision
-- must be made for the SIGPIPE which is delivered when
-- writing on a tcp socket which has closed underneath
-- the application. there is a linux flag in the sendmsg
-- call which turns off the signally behaviour, but its
-- nonstandard */
--
-- LASSERT (niov <= 256);
--
-- tiov[0].iov_base = hdr;
-- tiov[0].iov_len = sizeof(ptl_hdr_t);
-- ntiov = 1 + lib_extract_iov(256, &tiov[1], niov, iov, offset, len);
--
-- pthread_mutex_lock(&send_lock);
--#if 1
-- for (i = total = 0; i < ntiov; i++)
-- total += tiov[i].iov_len;
--
-- sysrc = syscall(SYS_writev, c->fd, tiov, ntiov);
-- if (sysrc != total) {
-- fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n",
-- rc, total, errno);
-- rc = PTL_FAIL;
-- }
--#else
-- for (i = total = 0; i <= ntiov; i++) {
-- rc = send(c->fd, tiov[i].iov_base, tiov[i].iov_len, 0);
--
-- if (rc != tiov[i].iov_len) {
-- fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n",
-- rc, tiov[i].iov_len, errno);
-- rc = PTL_FAIL;
-- break;
-- }
-- total += rc;
-- }
--#endif
--#if 0
-- fprintf (stderr, "sent %s total %d in %d frags\n",
-- hdr->type == PTL_MSG_ACK ? "ACK" :
-- hdr->type == PTL_MSG_PUT ? "PUT" :
-- hdr->type == PTL_MSG_GET ? "GET" :
-- hdr->type == PTL_MSG_REPLY ? "REPLY" :
-- hdr->type == PTL_MSG_HELLO ? "HELLO" : "UNKNOWN",
-- total, niov + 1);
--#endif
-- pthread_mutex_unlock(&send_lock);
--
-- if (rc == PTL_OK) {
-- /* NB the NAL only calls lib_finalize() if it returns PTL_OK
-- * from cb_send() */
-- lib_finalize(n, private, cookie, PTL_OK);
-- }
--
-- return(rc);
--}
--
--
--/* Function: tcpnal_recv
- * Arguments: nal_cb_t *nal: pointer to my nal control block
- * Arguments: lib_nal_t *nal: pointer to my nal control block
-- * void *private: connection pointer passed through
-- * lib_parse()
-- * lib_msg_t *cookie: passed back to portals library
-- * user_ptr data: pointer to the destination buffer
-- * size_t mlen: length of the body
-- * size_t rlen: length of data in the network
-- * Returns: zero on success
-- *
-- * blocking read of the requested data. must drain out the
-- * difference of mainpulated and requested lengths from the network
-- */
- ptl_err_t tcpnal_recv(nal_cb_t *n,
-ptl_err_t tcpnal_recv(lib_nal_t *n,
-- void *private,
-- lib_msg_t *cookie,
-- unsigned int niov,
-- struct iovec *iov,
-- size_t offset,
-- size_t mlen,
-- size_t rlen)
--
--{
-- struct iovec tiov[256];
-- int ntiov;
-- int i;
--
-- if (!niov)
-- goto finalize;
--
-- LASSERT(mlen);
-- LASSERT(rlen);
-- LASSERT(rlen >= mlen);
--
-- ntiov = lib_extract_iov(256, tiov, niov, iov, offset, mlen);
--
-- /* FIXME
-- * 1. Is this effecient enough? change to use readv() directly?
-- * 2. need check return from read_connection()
-- * - MeiJia
-- */
-- for (i = 0; i < ntiov; i++)
-- read_connection(private, tiov[i].iov_base, tiov[i].iov_len);
--
--finalize:
-- /* FIXME; we always assume success here... */
-- lib_finalize(n, private, cookie, PTL_OK);
--
-- if (mlen!=rlen){
-- char *trash=malloc(rlen-mlen);
--
-- /*TODO: check error status*/
-- read_connection(private,trash,rlen-mlen);
-- free(trash);
-- }
--
-- return(PTL_OK);
--}
--
--
--/* Function: from_connection:
-- * Arguments: c: the connection to read from
-- * Returns: whether or not to continue reading from this connection,
-- * expressed as a 1 to continue, and a 0 to not
-- *
-- * from_connection() is called from the select loop when i/o is
-- * available. It attempts to read the portals header and
-- * pass it to the generic library for processing.
-- */
--static int from_connection(void *a, void *d)
--{
-- connection c = d;
-- bridge b = a;
-- ptl_hdr_t hdr;
--
-- if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
- lib_parse(b->nal_cb, &hdr, c);
- lib_parse(b->lib_nal, &hdr, c);
- /*TODO: check error status*/
-- return(1);
-- }
-- return(0);
--}
--
--
--static void tcpnal_shutdown(bridge b)
--{
-- shutdown_connections(b->lower);
--}
--
--/* Function: PTL_IFACE_TCP
-- * Arguments: pid_request: desired port number to bind to
-- * desired: passed NAL limits structure
-- * actual: returned NAL limits structure
-- * Returns: a nal structure on success, or null on failure
-- */
--int tcpnal_init(bridge b)
--{
-- manager m;
--
- b->nal_cb->cb_send=tcpnal_send;
- b->nal_cb->cb_recv=tcpnal_recv;
- b->lib_nal->libnal_send=tcpnal_send;
- b->lib_nal->libnal_recv=tcpnal_recv;
-- b->shutdown=tcpnal_shutdown;
--
- if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
- b->nal_cb->ni.pid),
- if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid,
- b->lib_nal->libnal_ni.ni_pid.pid),
-- from_connection,b))){
-- /* TODO: this needs to shut down the
-- newly created junk */
-- return(PTL_NAL_FAILED);
-- }
-- /* XXX cfs hack */
- b->nal_cb->ni.pid=0;
-// b->lib_nal->libnal_ni.ni_pid.pid=0;
-- b->lower=m;
-- return(PTL_OK);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--/* TODO: make this an explicit type when they become available */
--typedef unsigned long long when;
--
--typedef struct timer {
-- void (*function)(void *);
-- void *arg;
-- when w;
-- int interval;
-- int disable;
--} *timer;
--
--timer register_timer(when, void (*f)(void *), void *a);
--void remove_timer(timer t);
--void timer_loop(void);
--void initialize_timer(void);
--void register_thunk(void (*f)(void *),void *a);
--
--
--#define HZ 0x100000000ull
--
--
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--typedef unsigned short uint16;
--typedef unsigned long uint32;
--typedef unsigned long long uint64;
--typedef unsigned char uint8;
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#include <table.h>
--#include <stdlib.h>
--#include <string.h>
--
--
--/* table.c:
-- * a very simple hash table implementation with paramerterizable
-- * comparison and key generation functions. it does resize
-- * in order to accomidate more entries, but never collapses
-- * the table
-- */
--
--static table_entry *table_lookup (table t,void *comparator,
-- unsigned int k,
-- int (*compare_function)(void *, void *),
-- int *success)
--{
-- unsigned int key=k%t->size;
-- table_entry *i;
--
-- for (i=&(t->entries[key]);*i;i=&((*i)->next)){
-- if (compare_function && ((*i)->key==k))
-- if ((*t->compare_function)((*i)->value,comparator)){
-- *success=1;
-- return(i);
-- }
-- }
-- *success=0;
-- return(&(t->entries[key]));
--}
--
--
--static void resize_table(table t, int size)
--{
-- int old_size=t->size;
-- table_entry *old_entries=t->entries;
-- int i;
-- table_entry j,n;
-- table_entry *position;
-- int success;
--
-- t->size=size;
-- t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size);
-- memset(t->entries,0,sizeof(table_entry)*t->size);
--
-- for (i=0;i<old_size;i++)
-- for (j=old_entries[i];j;j=n){
-- n=j->next;
-- position=table_lookup(t,0,j->key,0,&success);
-- j->next= *position;
-- *position=j;
-- }
-- free(old_entries);
--}
--
--
--/* Function: key_from_int
-- * Arguments: int i: value to compute the key of
-- * Returns: the key
-- */
--unsigned int key_from_int(int i)
--{
-- return(i);
--}
--
--
--/* Function: key_from_string
-- * Arguments: char *s: the null terminated string
-- * to compute the key of
-- * Returns: the key
-- */
--unsigned int key_from_string(char *s)
--{
-- unsigned int result=0;
-- unsigned char *n;
-- int i;
-- if (!s) return(1);
-- for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i;
-- return(result);
--}
--
--
--/* Function: hash_create_table
-- * Arguments: compare_function: a function to compare
-- * a table instance with a correlator
-- * key_function: a function to generate a 32 bit
-- * hash key from a correlator
-- * Returns: a pointer to the new table
-- */
--table hash_create_table (int (*compare_function)(void *, void *),
-- unsigned int (*key_function)(unsigned int *))
--{
-- table new=(table)malloc(sizeof(struct table));
-- memset(new, 0, sizeof(struct table));
--
-- new->compare_function=compare_function;
-- new->key_function=key_function;
-- new->number_of_entries=0;
-- new->size=4;
-- new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size);
-- memset(new->entries,0,sizeof(table_entry)*new->size);
-- return(new);
--}
--
--
--/* Function: hash_table_find
-- * Arguments: t: a table to look in
-- * comparator: a value to access the table entry
-- * Returns: the element references to by comparator, or null
-- */
--void *hash_table_find (table t, void *comparator)
--{
-- int success;
-- table_entry* entry=table_lookup(t,comparator,
-- (*t->key_function)(comparator),
-- t->compare_function,
-- &success);
-- if (success) return((*entry)->value);
-- return(0);
--}
--
--
--/* Function: hash_table_insert
-- * Arguments: t: a table to insert the object
-- * value: the object to put in the table
-- * comparator: the value by which the object
-- * will be addressed
-- * Returns: nothing
-- */
--void hash_table_insert (table t, void *value, void *comparator)
--{
-- int success;
-- unsigned int k=(*t->key_function)(comparator);
-- table_entry *position=table_lookup(t,comparator,k,
-- t->compare_function,&success);
-- table_entry entry;
--
-- if (success) {
-- entry = *position;
-- } else {
-- entry = (table_entry)malloc(sizeof(struct table_entry));
-- memset(entry, 0, sizeof(struct table_entry));
-- entry->next= *position;
-- *position=entry;
-- t->number_of_entries++;
-- }
-- entry->value=value;
-- entry->key=k;
-- if (t->number_of_entries > t->size) resize_table(t,t->size*2);
--}
--
--/* Function: hash_table_remove
-- * Arguments: t: the table to remove the object from
-- * comparator: the index value of the object to remove
-- * Returns:
-- */
--void hash_table_remove (table t, void *comparator)
--{
-- int success;
-- table_entry temp;
-- table_entry *position=table_lookup(t,comparator,
-- (*t->key_function)(comparator),
-- t->compare_function,&success);
-- if(success) {
-- temp=*position;
-- *position=(*position)->next;
-- free(temp); /* the value? */
-- t->number_of_entries--;
-- }
--}
--
--/* Function: hash_iterate_table_entries
-- * Arguments: t: the table to iterate over
-- * handler: a function to call with each element
-- * of the table, along with arg
-- * arg: the opaque object to pass to handler
-- * Returns: nothing
-- */
--void hash_iterate_table_entries(table t,
-- void (*handler)(void *,void *),
-- void *arg)
--{
-- int i;
-- table_entry *j,*next;
--
-- for (i=0;i<t->size;i++)
-- for (j=t->entries+i;*j;j=next){
-- next=&((*j)->next);
-- (*handler)(arg,(*j)->value);
-- }
--}
--
--/* Function: hash_filter_table_entries
-- * Arguments: t: the table to iterate over
-- * handler: a function to call with each element
-- * of the table, along with arg
-- * arg: the opaque object to pass to handler
-- * Returns: nothing
-- * Notes: operations on the table inside handler are not safe
-- *
-- * filter_table_entires() calls the handler function for each
-- * item in the table, passing it and arg. The handler function
-- * returns 1 if it is to be retained in the table, and 0
-- * if it is to be removed.
-- */
--void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg)
--{
-- int i;
-- table_entry *j,*next,v;
--
-- for (i=0;i<t->size;i++)
-- for (j=t->entries+i;*j;j=next){
-- next=&((*j)->next);
-- if (!(*handler)(arg,(*j)->value)){
-- next=j;
-- v=*j;
-- *j=(*j)->next;
-- free(v);
-- t->number_of_entries--;
-- }
-- }
--}
--
--/* Function: destroy_table
-- * Arguments: t: the table to free
-- * thunk: a function to call with each element,
-- * most likely free()
-- * Returns: nothing
-- */
--void hash_destroy_table(table t,void (*thunk)(void *))
--{
-- table_entry j,next;
-- int i;
-- for (i=0;i<t->size;i++)
-- for (j=t->entries[i];j;j=next){
-- next=j->next;
-- if (thunk) (*thunk)(j->value);
-- free(j);
-- }
-- free(t->entries);
-- free(t);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--#ifndef E_TABLE
--#define E_TABLE
--
--typedef struct table_entry {
-- unsigned int key;
-- void *value;
-- struct table_entry *next;
--} *table_entry;
--
--
--typedef struct table {
-- unsigned int size;
-- int number_of_entries;
-- table_entry *entries;
-- int (*compare_function)(void *, void *);
-- unsigned int (*key_function)(unsigned int *);
--} *table;
--
--/* table.c */
--unsigned int key_from_int(int i);
--unsigned int key_from_string(char *s);
--table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
--void *hash_table_find(table t, void *comparator);
--void hash_table_insert(table t, void *value, void *comparator);
--void hash_table_remove(table t, void *comparator);
--void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg);
--void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg);
--void hash_destroy_table(table t, void (*thunk)(void *));
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2003 Cluster File Systems, Inc.
-- *
-- * This file is part of Lustre, http://www.lustre.org.
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--/* tcpnal.c:
-- This file implements the TCP-based nal by providing glue
-- between the connection service and the generic NAL implementation */
--
--#include <stdlib.h>
--#include <stdio.h>
--#include <stdarg.h>
--#include <unistd.h>
--#include <sys/types.h>
--#include <sys/socket.h>
--#include <netinet/in.h>
--#include <pqtimer.h>
--#include <dispatch.h>
--#include <bridge.h>
--#include <ipmap.h>
--#include <connection.h>
--#include <pthread.h>
--#include <errno.h>
--#ifndef __CYGWIN__
--#include <syscall.h>
--#endif
--
--/* Function: tcpnal_send
-- * Arguments: nal: pointer to my nal control block
-- * private: unused
-- * cookie: passed back to the portals library
-- * hdr: pointer to the portals header
-- * nid: destination node
-- * pid: destination process
-- * data: body of the message
-- * len: length of the body
-- * Returns: zero on success
-- *
-- * sends a packet to the peer, after insuring that a connection exists
-- */
- ptl_err_t tcpnal_send(nal_cb_t *n,
-ptl_err_t tcpnal_send(lib_nal_t *n,
-- void *private,
-- lib_msg_t *cookie,
-- ptl_hdr_t *hdr,
-- int type,
-- ptl_nid_t nid,
-- ptl_pid_t pid,
-- unsigned int niov,
-- struct iovec *iov,
-- size_t offset,
-- size_t len)
--{
-- connection c;
- bridge b=(bridge)n->nal_data;
- bridge b=(bridge)n->libnal_data;
-- struct iovec tiov[257];
-- static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER;
-- ptl_err_t rc = PTL_OK;
-- int sysrc;
-- int total;
-- int ntiov;
-- int i;
--
-- if (!(c=force_tcp_connection((manager)b->lower,
-- PNAL_IP(nid,b),
-- PNAL_PORT(nid,pid),
-- b->local)))
-- return(PTL_FAIL);
--
-- /* TODO: these results should be checked. furthermore, provision
-- must be made for the SIGPIPE which is delivered when
-- writing on a tcp socket which has closed underneath
-- the application. there is a linux flag in the sendmsg
-- call which turns off the signally behaviour, but its
-- nonstandard */
--
-- LASSERT (niov <= 256);
--
-- tiov[0].iov_base = hdr;
-- tiov[0].iov_len = sizeof(ptl_hdr_t);
-- ntiov = 1 + lib_extract_iov(256, &tiov[1], niov, iov, offset, len);
--
-- pthread_mutex_lock(&send_lock);
--#if 1
-- for (i = total = 0; i < ntiov; i++)
-- total += tiov[i].iov_len;
--
-- sysrc = syscall(SYS_writev, c->fd, tiov, ntiov);
-- if (sysrc != total) {
-- fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n",
-- rc, total, errno);
-- rc = PTL_FAIL;
-- }
--#else
-- for (i = total = 0; i <= ntiov; i++) {
-- rc = send(c->fd, tiov[i].iov_base, tiov[i].iov_len, 0);
--
-- if (rc != tiov[i].iov_len) {
-- fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n",
-- rc, tiov[i].iov_len, errno);
-- rc = PTL_FAIL;
-- break;
-- }
-- total += rc;
-- }
--#endif
--#if 0
-- fprintf (stderr, "sent %s total %d in %d frags\n",
-- hdr->type == PTL_MSG_ACK ? "ACK" :
-- hdr->type == PTL_MSG_PUT ? "PUT" :
-- hdr->type == PTL_MSG_GET ? "GET" :
-- hdr->type == PTL_MSG_REPLY ? "REPLY" :
-- hdr->type == PTL_MSG_HELLO ? "HELLO" : "UNKNOWN",
-- total, niov + 1);
--#endif
-- pthread_mutex_unlock(&send_lock);
--
-- if (rc == PTL_OK) {
-- /* NB the NAL only calls lib_finalize() if it returns PTL_OK
-- * from cb_send() */
-- lib_finalize(n, private, cookie, PTL_OK);
-- }
--
-- return(rc);
--}
--
--
--/* Function: tcpnal_recv
- * Arguments: nal_cb_t *nal: pointer to my nal control block
- * Arguments: lib_nal_t *nal: pointer to my nal control block
-- * void *private: connection pointer passed through
-- * lib_parse()
-- * lib_msg_t *cookie: passed back to portals library
-- * user_ptr data: pointer to the destination buffer
-- * size_t mlen: length of the body
-- * size_t rlen: length of data in the network
-- * Returns: zero on success
-- *
-- * blocking read of the requested data. must drain out the
-- * difference of mainpulated and requested lengths from the network
-- */
- ptl_err_t tcpnal_recv(nal_cb_t *n,
-ptl_err_t tcpnal_recv(lib_nal_t *n,
-- void *private,
-- lib_msg_t *cookie,
-- unsigned int niov,
-- struct iovec *iov,
-- size_t offset,
-- size_t mlen,
-- size_t rlen)
--
--{
-- struct iovec tiov[256];
-- int ntiov;
-- int i;
--
-- if (!niov)
-- goto finalize;
--
-- LASSERT(mlen);
-- LASSERT(rlen);
-- LASSERT(rlen >= mlen);
--
-- ntiov = lib_extract_iov(256, tiov, niov, iov, offset, mlen);
--
-- /* FIXME
-- * 1. Is this effecient enough? change to use readv() directly?
-- * 2. need check return from read_connection()
-- * - MeiJia
-- */
-- for (i = 0; i < ntiov; i++)
-- read_connection(private, tiov[i].iov_base, tiov[i].iov_len);
--
--finalize:
-- /* FIXME; we always assume success here... */
-- lib_finalize(n, private, cookie, PTL_OK);
--
-- if (mlen!=rlen){
-- char *trash=malloc(rlen-mlen);
--
-- /*TODO: check error status*/
-- read_connection(private,trash,rlen-mlen);
-- free(trash);
-- }
--
-- return(PTL_OK);
--}
--
--
--/* Function: from_connection:
-- * Arguments: c: the connection to read from
-- * Returns: whether or not to continue reading from this connection,
-- * expressed as a 1 to continue, and a 0 to not
-- *
-- * from_connection() is called from the select loop when i/o is
-- * available. It attempts to read the portals header and
-- * pass it to the generic library for processing.
-- */
--static int from_connection(void *a, void *d)
--{
-- connection c = d;
-- bridge b = a;
-- ptl_hdr_t hdr;
--
-- if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
- lib_parse(b->nal_cb, &hdr, c);
- lib_parse(b->lib_nal, &hdr, c);
- /*TODO: check error status*/
-- return(1);
-- }
-- return(0);
--}
--
--
--static void tcpnal_shutdown(bridge b)
--{
-- shutdown_connections(b->lower);
--}
--
--/* Function: PTL_IFACE_TCP
-- * Arguments: pid_request: desired port number to bind to
-- * desired: passed NAL limits structure
-- * actual: returned NAL limits structure
-- * Returns: a nal structure on success, or null on failure
-- */
--int tcpnal_init(bridge b)
--{
-- manager m;
--
- b->nal_cb->cb_send=tcpnal_send;
- b->nal_cb->cb_recv=tcpnal_recv;
- b->lib_nal->libnal_send=tcpnal_send;
- b->lib_nal->libnal_recv=tcpnal_recv;
-- b->shutdown=tcpnal_shutdown;
--
- if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid,
- b->nal_cb->ni.pid),
- if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid,
- b->lib_nal->libnal_ni.ni_pid.pid),
-- from_connection,b))){
-- /* TODO: this needs to shut down the
-- newly created junk */
-- return(PTL_NAL_FAILED);
-- }
-- /* XXX cfs hack */
- b->nal_cb->ni.pid=0;
-// b->lib_nal->libnal_ni.ni_pid.pid=0;
-- b->lower=m;
-- return(PTL_OK);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- * Copyright (c) 2002 Eric Hoffman
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--/* TODO: make this an explicit type when they become available */
--typedef unsigned long long when;
--
--typedef struct timer {
-- void (*function)(void *);
-- void *arg;
-- when w;
-- int interval;
-- int disable;
--} *timer;
--
--timer register_timer(when, void (*f)(void *), void *a);
--void remove_timer(timer t);
--void timer_loop(void);
--void initialize_timer(void);
--void register_thunk(void (*f)(void *),void *a);
--
--
--#define HZ 0x100000000ull
--
--
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2002 Cray Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
-- */
--
--typedef unsigned short uint16;
--typedef unsigned long uint32;
--typedef unsigned long long uint64;
--typedef unsigned char uint8;
+++ /dev/null
--Makefile
--Makefile.in
--acceptor
--debugctl
--ptlctl
--.deps
--routerstat
--wirecheck
--gmnalnid
--.*.cmd
+++ /dev/null
--# Copyright (C) 2001 Cluster File Systems, Inc.
--#
--# This code is issued under the GNU General Public License.
--# See the file COPYING in this distribution
--
--## $(srcdir)/../ for <portals/*.h>, ../../ for generated <config.h>
--#COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I../../include
--#LINK = $(CC) -o $@
--
--if LIBLUSTRE
--noinst_LIBRARIES = libuptlctl.a
- libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h
-endif
-
-libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c
--libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS)
--libuptlctl_a_CFLAGS = $(LLCFLAGS)
- endif
--
- if UTILS
- sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck gmnalnid
-sbin_PROGRAMS = debugctl
-
--lib_LIBRARIES = libptlctl.a
-
-libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h
-
-if UTILS
-if !CRAY_PORTALS
-sbin_PROGRAMS += acceptor ptlctl routerstat wirecheck gmnalnid
-endif
--endif
--
--acceptor_SOURCES = acceptor.c
--acceptor_LDADD = $(LIBWRAP)
--
--wirecheck_SOURCES = wirecheck.c
-
- libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h
--
--gmnalnid_SOURCES = gmnalnid.c
--
--ptlctl_SOURCES = ptlctl.c
--ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
--ptlctl_DEPENDENCIES = libptlctl.a
-
-routerstat_SOURCES = routerstat.c
--
--debugctl_SOURCES = debugctl.c
--debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
--debugctl_DEPENDENCIES = libptlctl.a
--
- routerstat_SOURCES = routerstat.c
+++ /dev/null
--include $(src)/../Kernelenv
--
--host-progs := acceptor ptlctl
--always := $(host-progs)
--
--ptlctl-objs := ptlctl.o $(PTLCTLOBJS)
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- */
--#include <stdio.h>
--#include <sys/types.h>
--#include <sys/socket.h>
--#include <netinet/tcp.h>
--#include <netdb.h>
--#include <stdlib.h>
--#include <string.h>
--#include <fcntl.h>
--#include <sys/ioctl.h>
--#include <unistd.h>
--#include <syslog.h>
--#include <errno.h>
--#ifdef HAVE_LIBWRAP
--#include <arpa/inet.h>
--#include <netinet/in.h>
--#include <tcpd.h>
--#endif
--
--#include <portals/api-support.h>
--#include <portals/list.h>
--#include <portals/lib-types.h>
--#include <portals/socknal.h>
--
--/* should get this from autoconf somehow */
--#ifndef PIDFILE_DIR
--#define PIDFILE_DIR "/var/run"
--#endif
--
--#define PROGNAME "acceptor"
--
--#ifdef HAVE_LIBWRAP
--/* needed because libwrap declares these as externs */
--int allow_severity = LOG_INFO;
--int deny_severity = LOG_WARNING;
--#endif
--
--void create_pidfile(char *name, int port)
--{
-- char pidfile[1024];
-- FILE *fp;
--
-- snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid",
-- PIDFILE_DIR, name, port);
--
-- if ((fp = fopen(pidfile, "w"))) {
-- fprintf(fp, "%d\n", getpid());
-- fclose(fp);
-- } else {
-- syslog(LOG_ERR, "%s: %s\n", pidfile,
-- strerror(errno));
-- }
--}
--
--int pidfile_exists(char *name, int port)
--{
-- char pidfile[1024];
--
-- snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid",
-- PIDFILE_DIR, name, port);
--
-- if (!access(pidfile, F_OK)) {
-- fprintf(stderr, "%s: exists, acceptor already running.\n",
-- pidfile);
-- return (1);
-- }
-- return (0);
- }
-
- int
- parse_size (int *sizep, char *str)
- {
- int size;
- char mod[32];
-
- switch (sscanf (str, "%d%1[gGmMkK]", &size, mod))
- {
- default:
- return (-1);
-
- case 1:
- *sizep = size;
- return (0);
-
- case 2:
- switch (*mod)
- {
- case 'g':
- case 'G':
- *sizep = size << 30;
- return (0);
-
- case 'm':
- case 'M':
- *sizep = size << 20;
- return (0);
-
- case 'k':
- case 'K':
- *sizep = size << 10;
- return (0);
-
- default:
- *sizep = size;
- return (0);
- }
- }
--}
--
--void
--show_connection (int fd, __u32 net_ip)
--{
-- struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET);
-- __u32 host_ip = ntohl (net_ip);
- int rxmem = 0;
- int txmem = 0;
- int nonagle = 0;
-- int len;
-- char host[1024];
-
- len = sizeof (txmem);
- if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &len) != 0)
- perror ("Cannot get write buffer size");
-
- len = sizeof (rxmem);
- if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &len) != 0)
- perror ("Cannot get read buffer size");
--
- len = sizeof (nonagle);
- if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &len) != 0)
- perror ("Cannot get nagle");
-
-- if (h == NULL)
-- snprintf (host, sizeof(host), "%d.%d.%d.%d", (host_ip >> 24) & 0xff,
-- (host_ip >> 16) & 0xff, (host_ip >> 8) & 0xff, host_ip & 0xff);
-- else
-- snprintf (host, sizeof(host), "%s", h->h_name);
--
- syslog (LOG_INFO, "Accepted host: %s snd: %d rcv %d nagle: %s\n",
- host, txmem, rxmem, nonagle ? "disabled" : "enabled");
- syslog (LOG_INFO, "Accepted host: %s\n", host);
--}
--
--void
--usage (char *myname)
--{
- fprintf (stderr, "Usage: %s [-r recv_mem] [-s send_mem] [-n] [-p] [-N nal_id] port\n", myname);
- fprintf (stderr, "Usage: %s [-N nal_id] port\n", myname);
-- exit (1);
--}
--
--int main(int argc, char **argv)
--{
-- int o, fd, rc, port, pfd;
-- struct sockaddr_in srvaddr;
-- int c;
- int rxmem = 0;
- int txmem = 0;
-- int noclose = 0;
- int nonagle = 1;
-- int nal = SOCKNAL;
- int bind_irq = 0;
- int rport;
- int require_privports = 1;
--
- while ((c = getopt (argc, argv, "N:pr:s:nli")) != -1)
- while ((c = getopt (argc, argv, "N:l")) != -1)
-- switch (c)
-- {
- case 'r':
- if (parse_size (&rxmem, optarg) != 0 || rxmem < 0)
- usage (argv[0]);
- break;
-
- case 's':
- if (parse_size (&txmem, optarg) != 0 || txmem < 0)
- usage (argv[0]);
- break;
-
- case 'n':
- nonagle = 0;
- break;
-
-- case 'l':
-- noclose = 1;
-- break;
--
- case 'i':
- bind_irq = 1;
- break;
- case 'p':
- require_privports = 0;
- break;
-- case 'N':
- if (parse_size(&nal, optarg) != 0 ||
- if (sscanf(optarg, "%d", &nal) != 1 ||
-- nal < 0 || nal > NAL_MAX_NR)
-- usage(argv[0]);
-- break;
--
-- default:
-- usage (argv[0]);
-- break;
-- }
--
-- if (optind >= argc)
-- usage (argv[0]);
--
-- port = atol(argv[optind++]);
--
-- if (pidfile_exists(PROGNAME, port))
-- exit(1);
--
-- memset(&srvaddr, 0, sizeof(srvaddr));
-- srvaddr.sin_family = AF_INET;
-- srvaddr.sin_port = htons(port);
-- srvaddr.sin_addr.s_addr = INADDR_ANY;
--
-- fd = socket(PF_INET, SOCK_STREAM, 0);
-- if (fd < 0) {
-- perror("opening socket");
-- exit(1);
-- }
--
-- o = 1;
-- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o))) {
-- perror("Cannot set REUSEADDR socket opt");
-- exit(1);
- }
-
- if (nonagle)
- {
- o = 1;
- rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o));
- if (rc != 0)
- {
- perror ("Cannot disable nagle");
- exit (1);
- }
-- }
--
- if (txmem != 0)
- {
- rc = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, sizeof (txmem));
- if (rc != 0)
- {
- perror ("Cannot set write buffer size");
- exit (1);
- }
- }
-
- if (rxmem != 0)
- {
- rc = setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, sizeof (rxmem));
- if (rc != 0)
- {
- perror ("Cannot set read buffer size");
- exit (1);
- }
- }
-
-- rc = bind(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr));
-- if ( rc == -1 ) {
-- perror("bind: ");
-- exit(1);
-- }
--
-- if (listen(fd, 127)) {
-- perror("listen: ");
-- exit(1);
-- }
-- fprintf(stderr, "listening on port %d\n", port);
--
-- pfd = open("/dev/portals", O_RDWR);
-- if ( pfd < 0 ) {
-- perror("opening portals device");
-- exit(1);
-- }
--
- rc = daemon(0, noclose);
- rc = daemon(1, noclose);
-- if (rc < 0) {
-- perror("daemon(): ");
-- exit(1);
-- }
--
-- openlog(PROGNAME, LOG_PID, LOG_DAEMON);
-- syslog(LOG_INFO, "started, listening on port %d\n", port);
-- create_pidfile(PROGNAME, port);
--
-- while (1) {
-- struct sockaddr_in clntaddr;
-- int len = sizeof(clntaddr);
-- int cfd;
-- struct portal_ioctl_data data;
-- struct portals_cfg pcfg;
- int privileged = 0;
- char addrstr[INET_ADDRSTRLEN];
--#ifdef HAVE_LIBWRAP
-- struct request_info request;
- char addrstr[INET_ADDRSTRLEN];
--#endif
-
-
-- cfd = accept(fd, (struct sockaddr *)&clntaddr, &len);
-- if ( cfd < 0 ) {
-- perror("accept");
-- exit(0);
-- continue;
-- }
--
- rport = ntohs(clntaddr.sin_port);
--#ifdef HAVE_LIBWRAP
-- /* libwrap access control */
-- request_init(&request, RQ_DAEMON, "lustre", RQ_FILE, cfd, 0);
-- sock_host(&request);
-- if (!hosts_access(&request)) {
-- inet_ntop(AF_INET, &clntaddr.sin_addr,
-- addrstr, INET_ADDRSTRLEN);
-- syslog(LOG_WARNING, "Unauthorized access from %s:%hd\n",
- addrstr, rport);
- addrstr, ntohs(clntaddr.sin_port));
-- close (cfd);
-- continue;
-- }
--#endif
-
- if (require_privports && rport >= IPPORT_RESERVED) {
- inet_ntop(AF_INET, &clntaddr.sin_addr,
- addrstr, INET_ADDRSTRLEN);
- syslog(LOG_ERR, "Closing non-privileged connection from %s:%d\n",
- addrstr, rport);
- close(cfd);
- continue;
- }
-
-- show_connection (cfd, clntaddr.sin_addr.s_addr);
--
-- PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD);
-- pcfg.pcfg_nal = nal;
-- pcfg.pcfg_fd = cfd;
- pcfg.pcfg_flags = bind_irq;
-- pcfg.pcfg_misc = SOCKNAL_CONN_NONE; /* == incoming connection */
-
-
-- PORTAL_IOC_INIT(data);
-- data.ioc_pbuf1 = (char*)&pcfg;
-- data.ioc_plen1 = sizeof(pcfg);
-
-
-- if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) {
-- perror("ioctl failed");
-- } else {
-- printf("client registered\n");
-- }
-- rc = close(cfd);
-- if (rc)
-- perror ("close failed");
-- }
--
-- closelog();
-- exit(0);
--
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- * Some day I'll split all of this functionality into a cfs_debug module
-- * of its own. That day is not today.
-- *
-- */
--
--#define __USE_FILE_OFFSET64
-#define _GNU_SOURCE
--
--#include <portals/list.h>
--
--#include <stdio.h>
--#include <netdb.h>
--#include <stdlib.h>
--#include <string.h>
--#include <fcntl.h>
--#include <errno.h>
--#include <unistd.h>
- #include <time.h>
--#ifndef __CYGWIN__
--# include <syscall.h>
--#endif
--
--#include <sys/types.h>
--#include <sys/socket.h>
--#include <sys/ioctl.h>
--#include <sys/stat.h>
--#include <sys/mman.h>
--
--#include <linux/version.h>
--
--#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
--#define BUG() /* workaround for module.h includes */
--#include <linux/module.h>
--#endif
-#include <sys/utsname.h>
--
--#include <portals/api-support.h>
--#include <portals/ptlctl.h>
--#include "parser.h"
-
-#include <time.h>
--
--static char rawbuf[8192];
--static char *buf = rawbuf;
--static int max = 8192;
--//static int g_pfd = -1;
--static int subsystem_mask = ~0;
--static int debug_mask = ~0;
--
--#define MAX_MARK_SIZE 100
--
--static const char *portal_debug_subsystems[] =
-- {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite",
- "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter",
- "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", "ibnal",
- NULL};
- "rpc", "mgmt", "portals", "libcfs", "socknal", "qswnal", "pinger",
- "filter", "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd",
- "openibnal", "lmv", "smfs", "cmobd", NULL};
--static const char *portal_debug_masks[] =
-- {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
-- "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
-- "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
- "reada", NULL};
- "reada", "mmap", NULL};
--
--struct debug_daemon_cmd {
-- char *cmd;
-- unsigned int cmdv;
--};
--
--static const struct debug_daemon_cmd portal_debug_daemon_cmd[] = {
-- {"start", DEBUG_DAEMON_START},
-- {"stop", DEBUG_DAEMON_STOP},
-- {0, 0}
--};
--
--static int do_debug_mask(char *name, int enable)
--{
-- int found = 0, i;
--
-- for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
-- if (strcasecmp(name, portal_debug_subsystems[i]) == 0 ||
-- strcasecmp(name, "all_subs") == 0) {
-- printf("%s output from subsystem \"%s\"\n",
-- enable ? "Enabling" : "Disabling",
-- portal_debug_subsystems[i]);
-- if (enable)
-- subsystem_mask |= (1 << i);
-- else
-- subsystem_mask &= ~(1 << i);
-- found = 1;
-- }
-- }
-- for (i = 0; portal_debug_masks[i] != NULL; i++) {
-- if (strcasecmp(name, portal_debug_masks[i]) == 0 ||
-- strcasecmp(name, "all_types") == 0) {
-- printf("%s output of type \"%s\"\n",
-- enable ? "Enabling" : "Disabling",
-- portal_debug_masks[i]);
-- if (enable)
-- debug_mask |= (1 << i);
-- else
-- debug_mask &= ~(1 << i);
-- found = 1;
-- }
-- }
--
-- return found;
--}
--
--int dbg_initialize(int argc, char **argv)
--{
-- return 0;
--}
--
--int jt_dbg_filter(int argc, char **argv)
--{
-- int i;
--
-- if (argc < 2) {
-- fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
-- argv[0]);
-- return 0;
-- }
--
-- for (i = 1; i < argc; i++)
-- if (!do_debug_mask(argv[i], 0))
-- fprintf(stderr, "Unknown subsystem or debug type: %s\n",
-- argv[i]);
-- return 0;
--}
--
--int jt_dbg_show(int argc, char **argv)
--{
-- int i;
--
-- if (argc < 2) {
-- fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
-- argv[0]);
-- return 0;
-- }
--
-- for (i = 1; i < argc; i++)
-- if (!do_debug_mask(argv[i], 1))
-- fprintf(stderr, "Unknown subsystem or debug type: %s\n",
-- argv[i]);
--
-- return 0;
--}
--
--static int applymask(char* procpath, int value)
--{
-- int rc;
-- char buf[64];
-- int len = snprintf(buf, 64, "%d", value);
--
-- int fd = open(procpath, O_WRONLY);
-- if (fd == -1) {
-- fprintf(stderr, "Unable to open %s: %s\n",
-- procpath, strerror(errno));
-- return fd;
-- }
-- rc = write(fd, buf, len+1);
-- if (rc<0) {
-- fprintf(stderr, "Write to %s failed: %s\n",
-- procpath, strerror(errno));
-- return rc;
-- }
-- close(fd);
-- return 0;
--}
-
- extern char *dump_filename;
- extern int dump(int dev_id, int opc, void *buf);
--
--static void applymask_all(unsigned int subs_mask, unsigned int debug_mask)
--{
-- if (!dump_filename) {
-- applymask("/proc/sys/portals/subsystem_debug", subs_mask);
-- applymask("/proc/sys/portals/debug", debug_mask);
-- } else {
-- struct portals_debug_ioctl_data data;
--
-- data.hdr.ioc_len = sizeof(data);
-- data.hdr.ioc_version = 0;
-- data.subs = subs_mask;
-- data.debug = debug_mask;
--
-- dump(OBD_DEV_ID, PTL_IOC_DEBUG_MASK, &data);
-- }
-- printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/portals\n",
-- subs_mask, debug_mask);
--}
--
--int jt_dbg_list(int argc, char **argv)
--{
-- int i;
--
-- if (argc != 2) {
-- fprintf(stderr, "usage: %s <subs || types>\n", argv[0]);
-- return 0;
-- }
--
-- if (strcasecmp(argv[1], "subs") == 0) {
-- printf("Subsystems: all_subs");
-- for (i = 0; portal_debug_subsystems[i] != NULL; i++)
-- printf(", %s", portal_debug_subsystems[i]);
-- printf("\n");
-- } else if (strcasecmp(argv[1], "types") == 0) {
-- printf("Types: all_types");
-- for (i = 0; portal_debug_masks[i] != NULL; i++)
-- printf(", %s", portal_debug_masks[i]);
-- printf("\n");
-- } else if (strcasecmp(argv[1], "applymasks") == 0) {
-- applymask_all(subsystem_mask, debug_mask);
-- }
-- return 0;
--}
--
--/* all strings nul-terminated; only the struct and hdr need to be freed */
--struct dbg_line {
-- struct ptldebug_header *hdr;
-- char *file;
-- char *fn;
-- char *text;
-- struct list_head chain;
--};
--
--/* nurr. */
--static void list_add_ordered(struct dbg_line *new, struct list_head *head)
--{
-- struct list_head *pos;
- struct dbg_line *curr, *next;
- struct dbg_line *curr;
--
-- list_for_each(pos, head) {
-- curr = list_entry(pos, struct dbg_line, chain);
--
-- if (curr->hdr->ph_sec < new->hdr->ph_sec)
-- continue;
-- if (curr->hdr->ph_sec == new->hdr->ph_sec &&
-- curr->hdr->ph_usec < new->hdr->ph_usec)
-- continue;
--
-- list_add(&new->chain, pos->prev);
-- return;
-- }
-- list_add_tail(&new->chain, head);
--}
--
--static void print_saved_records(struct list_head *list, FILE *out)
--{
-- struct list_head *pos, *tmp;
--
-- list_for_each_safe(pos, tmp, list) {
-- struct dbg_line *line;
-- struct ptldebug_header *hdr;
--
-- line = list_entry(pos, struct dbg_line, chain);
-- list_del(&line->chain);
--
-- hdr = line->hdr;
-- fprintf(out, "%06x:%06x:%u:%u.%06Lu:%u:%u:%u:(%s:%u:%s()) %s",
-- hdr->ph_subsys, hdr->ph_mask, hdr->ph_cpu_id,
-- hdr->ph_sec, (unsigned long long)hdr->ph_usec,
-- hdr->ph_stack, hdr->ph_pid, hdr->ph_extern_pid,
-- line->file, hdr->ph_line_num, line->fn, line->text);
-- free(line->hdr);
-- free(line);
-- }
--}
--
--static int parse_buffer(FILE *in, FILE *out)
--{
-- struct dbg_line *line;
-- struct ptldebug_header *hdr;
-- char buf[4097], *p;
-- int rc;
-- unsigned long dropped = 0, kept = 0;
- struct list_head chunk_list, *pos;
- struct list_head chunk_list;
--
-- INIT_LIST_HEAD(&chunk_list);
--
-- while (1) {
-- rc = fread(buf, sizeof(hdr->ph_len), 1, in);
-- if (rc <= 0)
-- break;
--
-- hdr = (void *)buf;
-- if (hdr->ph_len == 0)
-- break;
-- if (hdr->ph_len > 4094) {
-- fprintf(stderr, "unexpected large record: %d bytes. "
-- "aborting.\n",
-- hdr->ph_len);
-- break;
-- }
--
-- if (hdr->ph_flags & PH_FLAG_FIRST_RECORD) {
-- print_saved_records(&chunk_list, out);
-- assert(list_empty(&chunk_list));
-- }
--
-- rc = fread(buf + sizeof(hdr->ph_len), 1,
-- hdr->ph_len - sizeof(hdr->ph_len), in);
-- if (rc <= 0)
-- break;
--
-- if (hdr->ph_mask &&
-- (!(subsystem_mask & hdr->ph_subsys) ||
-- (!(debug_mask & hdr->ph_mask)))) {
-- dropped++;
-- continue;
-- }
--
-- line = malloc(sizeof(*line));
-- if (line == NULL) {
-- fprintf(stderr, "malloc failed; printing accumulated "
-- "records and exiting.\n");
-- break;
-- }
--
-- line->hdr = malloc(hdr->ph_len + 1);
-- if (line->hdr == NULL) {
-- fprintf(stderr, "malloc failed; printing accumulated "
-- "records and exiting.\n");
-- break;
-- }
--
-- p = (void *)line->hdr;
-- memcpy(line->hdr, buf, hdr->ph_len);
-- p[hdr->ph_len] = '\0';
--
-- p += sizeof(*hdr);
-- line->file = p;
-- p += strlen(line->file) + 1;
-- line->fn = p;
-- p += strlen(line->fn) + 1;
-- line->text = p;
--
-- list_add_ordered(line, &chunk_list);
-- kept++;
-- }
--
-- print_saved_records(&chunk_list, out);
--
-- printf("Debug log: %lu lines, %lu kept, %lu dropped.\n",
-- dropped + kept, kept, dropped);
-- return 0;
--}
--
--int jt_dbg_debug_kernel(int argc, char **argv)
--{
-- char filename[4096];
-- int rc, raw = 0, fd;
-- FILE *in, *out = stdout;
--
-- if (argc > 3) {
-- fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]);
-- return 0;
-- }
-- sprintf(filename, "%s.%lu.%u", argc > 1 ? argv[1] : "/tmp/lustre-log",
-- time(NULL), getpid());
--
-- if (argc > 2)
-- raw = atoi(argv[2]);
-- unlink(filename);
--
-- fd = open("/proc/sys/portals/dump_kernel", O_WRONLY);
-- if (fd < 0) {
-- fprintf(stderr, "open(dump_kernel) failed: %s\n",
-- strerror(errno));
-- return 1;
-- }
--
-- rc = write(fd, filename, strlen(filename));
-- if (rc != strlen(filename)) {
-- fprintf(stderr, "write(%s) failed: %s\n", filename,
-- strerror(errno));
-- close(fd);
-- return 1;
-- }
-- close(fd);
--
-- if (raw)
-- return 0;
--
-- in = fopen(filename, "r");
-- if (in == NULL) {
- if (errno == ENOENT) /* no dump file created */
- return 0;
-
-- fprintf(stderr, "fopen(%s) failed: %s\n", filename,
-- strerror(errno));
-- return 1;
-- }
-- if (argc > 1) {
-- out = fopen(argv[1], "w");
-- if (out == NULL) {
-- fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
-- strerror(errno));
- fclose(in);
-- return 1;
-- }
-- }
--
-- rc = parse_buffer(in, out);
- fclose(in);
- if (argc > 1)
- fclose(out);
-- if (rc) {
-- fprintf(stderr, "parse_buffer failed; leaving tmp file %s "
-- "behind.\n", filename);
-- } else {
-- rc = unlink(filename);
-- if (rc)
-- fprintf(stderr, "dumped successfully, but couldn't "
-- "unlink tmp file %s: %s\n", filename,
-- strerror(errno));
-- }
-- return rc;
--}
--
--int jt_dbg_debug_file(int argc, char **argv)
--{
- int fdin,fdout;
-- FILE *in, *out = stdout;
-- if (argc > 3 || argc < 2) {
-- fprintf(stderr, "usage: %s <input> [output]\n", argv[0]);
-- return 0;
-- }
--
- in = fopen(argv[1], "r");
- fdin = open(argv[1], O_RDONLY | O_LARGEFILE);
- if (fdin == -1) {
- fprintf(stderr, "open(%s) failed: %s\n", argv[1],
- strerror(errno));
- return 1;
- }
- in = fdopen(fdin, "r");
-- if (in == NULL) {
-- fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
-- strerror(errno));
- close(fdin);
-- return 1;
-- }
-- if (argc > 2) {
- out = fopen(argv[2], "w");
- fdout = open(argv[2], O_CREAT | O_WRONLY | O_LARGEFILE);
- if (fdout == -1) {
- fprintf(stderr, "open(%s) failed: %s\n", argv[2],
- strerror(errno));
- fclose(in);
- return 1;
- }
- out = fdopen(fdout, "w");
-- if (out == NULL) {
-- fprintf(stderr, "fopen(%s) failed: %s\n", argv[2],
-- strerror(errno));
- fclose(in);
- close(fdout);
-- return 1;
-- }
-- }
--
-- return parse_buffer(in, out);
--}
--
- const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n";
- #define DAEMON_FILE "/proc/sys/portals/daemon_file"
-const char debug_daemon_usage[]="usage: debug_daemon {start file [MB]|stop}\n";
--int jt_dbg_debug_daemon(int argc, char **argv)
--{
- int rc = 1, fd;
-
- int rc, fd;
-
-- if (argc <= 1) {
- fprintf(stderr, debug_daemon_usage, argv[0]);
- return 1;
- fprintf(stderr, debug_daemon_usage);
- return 0;
-- }
-
- fd = open(DAEMON_FILE, O_WRONLY);
-
- fd = open("/proc/sys/portals/daemon_file", O_WRONLY);
-- if (fd < 0) {
- fprintf(stderr, "open %s failed: %s\n", DAEMON_FILE,
- fprintf(stderr, "open(daemon_file) failed: %s\n",
-- strerror(errno));
- } else if (strcasecmp(argv[1], "start") == 0) {
- if (argc < 3 || argc > 4 ||
- (argc == 4 && strlen(argv[3]) > 5)) {
- fprintf(stderr, debug_daemon_usage, argv[0]);
- goto out;
- }
-
- if (argc == 4) {
- char size[12] = "size=";
- long sizecheck;
-
- sizecheck = strtoul(argv[3], NULL, 0);
- if (sizecheck < 10 || sizecheck > 20480) {
- fprintf(stderr, "size %s invalid, must be in "
- "the range 20-20480 MB\n", argv[3]);
- } else {
- strncat(size, argv[3], sizeof(size) - 6);
- rc = write(fd, size, strlen(size));
- if (rc != strlen(size)) {
- fprintf(stderr, "set %s failed: %s\n", size, strerror(errno));
- }
- }
- return 1;
- }
-
- if (strcasecmp(argv[1], "start") == 0) {
- if (argc != 3) {
- fprintf(stderr, debug_daemon_usage);
- return 1;
-- }
-
-- rc = write(fd, argv[2], strlen(argv[2]));
-- if (rc != strlen(argv[2])) {
- fprintf(stderr, "start debug_daemon on %s failed: %s\n",
- argv[2], strerror(errno));
- goto out;
- fprintf(stderr, "write(%s) failed: %s\n", argv[2],
- strerror(errno));
- close(fd);
- return 1;
-- }
-
- rc = 0;
-- } else if (strcasecmp(argv[1], "stop") == 0) {
-- rc = write(fd, "stop", 4);
-- if (rc != 4) {
- fprintf(stderr, "stopping debug_daemon failed: %s\n",
- fprintf(stderr, "write(stop) failed: %s\n",
-- strerror(errno));
- goto out;
- close(fd);
- return 1;
-- }
- rc = 0;
-- } else {
- fprintf(stderr, debug_daemon_usage, argv[0]);
- rc = 1;
- fprintf(stderr, debug_daemon_usage);
- return 1;
-- }
-
- out:
-
-- close(fd);
-- return 0;
--}
--
--int jt_dbg_clear_debug_buf(int argc, char **argv)
--{
-- int rc;
-- struct portal_ioctl_data data;
--
-- if (argc != 1) {
-- fprintf(stderr, "usage: %s\n", argv[0]);
-- return 0;
-- }
--
-- memset(&data, 0, sizeof(data));
-- if (portal_ioctl_pack(&data, &buf, max) != 0) {
-- fprintf(stderr, "portal_ioctl_pack failed.\n");
-- return -1;
-- }
--
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_CLEAR_DEBUG, buf);
-- if (rc) {
-- fprintf(stderr, "IOC_PORTAL_CLEAR_DEBUG failed: %s\n",
-- strerror(errno));
-- return -1;
-- }
-- return 0;
--}
--
--int jt_dbg_mark_debug_buf(int argc, char **argv)
--{
-- int rc, max_size = MAX_MARK_SIZE-1;
-- struct portal_ioctl_data data;
-- char *text;
-- time_t now = time(NULL);
--
-- if (argc > 1) {
-- int counter;
-- text = malloc(MAX_MARK_SIZE);
-- strncpy(text, argv[1], max_size);
-- max_size-=strlen(argv[1]);
-- for(counter = 2; (counter < argc) && (max_size > 0) ; counter++){
-- strncat(text, " ", 1);
-- max_size-=1;
-- strncat(text, argv[counter], max_size);
-- max_size-=strlen(argv[counter]);
-- }
-- } else {
-- text = ctime(&now);
-- text[strlen(text) - 1] = '\0'; /* stupid \n */
-- }
-- if (!max_size) {
-- text[MAX_MARK_SIZE - 1] = '\0';
-- }
--
-- memset(&data, 0, sizeof(data));
-- data.ioc_inllen1 = strlen(text) + 1;
-- data.ioc_inlbuf1 = text;
-- if (portal_ioctl_pack(&data, &buf, max) != 0) {
-- fprintf(stderr, "portal_ioctl_pack failed.\n");
-- return -1;
-- }
--
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MARK_DEBUG, buf);
-- if (rc) {
-- fprintf(stderr, "IOC_PORTAL_MARK_DEBUG failed: %s\n",
-- strerror(errno));
-- return -1;
-- }
-- return 0;
--}
--
--static struct mod_paths {
-- char *name, *path;
--} mod_paths[] = {
- {"portals", "lustre/portals/libcfs"},
- {"libcfs", "lustre/portals/libcfs"},
- {"portals", "lustre/portals/portals"},
-- {"ksocknal", "lustre/portals/knals/socknal"},
-- {"kptlrouter", "lustre/portals/router"},
-- {"lvfs", "lustre/lvfs"},
-- {"obdclass", "lustre/obdclass"},
-- {"llog_test", "lustre/obdclass"},
-- {"ptlrpc", "lustre/ptlrpc"},
-- {"obdext2", "lustre/obdext2"},
-- {"ost", "lustre/ost"},
-- {"osc", "lustre/osc"},
-- {"mds", "lustre/mds"},
-- {"mdc", "lustre/mdc"},
-- {"llite", "lustre/llite"},
- {"smfs", "lustre/smfs"},
-- {"obdecho", "lustre/obdecho"},
-- {"ldlm", "lustre/ldlm"},
-- {"obdfilter", "lustre/obdfilter"},
-- {"extN", "lustre/extN"},
-- {"lov", "lustre/lov"},
- {"lmv", "lustre/lmv"},
-- {"fsfilt_ext3", "lustre/lvfs"},
-- {"fsfilt_extN", "lustre/lvfs"},
-- {"fsfilt_reiserfs", "lustre/lvfs"},
- {"fsfilt_smfs", "lustre/lvfs"},
- {"fsfilt_ldiskfs", "lustre/lvfs"},
-- {"mds_ext2", "lustre/mds"},
-- {"mds_ext3", "lustre/mds"},
-- {"mds_extN", "lustre/mds"},
-- {"ptlbd", "lustre/ptlbd"},
-- {"mgmt_svc", "lustre/mgmt"},
-- {"mgmt_cli", "lustre/mgmt"},
- {"cobd", "lustre/cobd"},
- {"cmobd", "lustre/cmobd"},
-- {NULL, NULL}
--};
--
- #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- int jt_dbg_modules(int argc, char **argv)
-static int jt_dbg_modules_2_4(int argc, char **argv)
--{
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-- struct mod_paths *mp;
-- char *path = "..";
-- char *kernel = "linux";
--
-- if (argc >= 2)
-- path = argv[1];
-- if (argc == 3)
-- kernel = argv[2];
-- if (argc > 3) {
-- printf("%s [path] [kernel]\n", argv[0]);
-- return 0;
-- }
--
-- for (mp = mod_paths; mp->name != NULL; mp++) {
-- struct module_info info;
-- int rc;
-- size_t crap;
-- int query_module(const char *name, int which, void *buf,
-- size_t bufsize, size_t *ret);
--
-- rc = query_module(mp->name, QM_INFO, &info, sizeof(info),
-- &crap);
-- if (rc < 0) {
-- if (errno != ENOENT)
-- printf("query_module(%s) failed: %s\n",
-- mp->name, strerror(errno));
-- } else {
-- printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path,
-- mp->path, mp->name,
-- info.addr + sizeof(struct module));
-- }
-- }
--
-- return 0;
-#else /* Headers are 2.6-only */
- return -EINVAL;
-#endif
--}
- #else
- int jt_dbg_modules(int argc, char **argv)
-
-static int jt_dbg_modules_2_5(int argc, char **argv)
--{
-- struct mod_paths *mp;
-- char *path = "..";
-- char *kernel = "linux";
-- const char *proc = "/proc/modules";
-- char modname[128], others[128];
-- long modaddr;
-- int rc;
-- FILE *file;
--
-- if (argc >= 2)
-- path = argv[1];
-- if (argc == 3)
-- kernel = argv[2];
-- if (argc > 3) {
-- printf("%s [path] [kernel]\n", argv[0]);
-- return 0;
-- }
--
-- file = fopen(proc, "r");
-- if (!file) {
-- printf("failed open %s: %s\n", proc, strerror(errno));
-- return 0;
-- }
--
-- while ((rc = fscanf(file, "%s %s %s %s %s %lx\n",
-- modname, others, others, others, others, &modaddr)) == 6) {
-- for (mp = mod_paths; mp->name != NULL; mp++) {
-- if (!strcmp(mp->name, modname))
-- break;
-- }
-- if (mp->name) {
-- printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path,
-- mp->path, mp->name, modaddr);
-- }
-- }
--
-- return 0;
--}
- #endif /* linux 2.5 */
-
-int jt_dbg_modules(int argc, char **argv)
-{
- int rc = 0;
- struct utsname sysinfo;
-
- rc = uname(&sysinfo);
- if (rc) {
- printf("uname() failed: %s\n", strerror(errno));
- return 0;
- }
-
- if (sysinfo.release[2] > '4') {
- return jt_dbg_modules_2_5(argc, argv);
- } else {
- return jt_dbg_modules_2_4(argc, argv);
- }
-
- return 0;
-}
--
--int jt_dbg_panic(int argc, char **argv)
--{
-- int rc;
-- struct portal_ioctl_data data;
--
-- if (argc != 1) {
-- fprintf(stderr, "usage: %s\n", argv[0]);
-- return 0;
-- }
--
-- memset(&data, 0, sizeof(data));
-- if (portal_ioctl_pack(&data, &buf, max) != 0) {
-- fprintf(stderr, "portal_ioctl_pack failed.\n");
-- return -1;
-- }
--
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PANIC, buf);
-- if (rc) {
-- fprintf(stderr, "IOC_PORTAL_PANIC failed: %s\n",
-- strerror(errno));
-- return -1;
-- }
-- return 0;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- * Some day I'll split all of this functionality into a cfs_debug module
-- * of its own. That day is not today.
-- *
-- */
--
--#include <stdio.h>
--#include <stdlib.h>
--#include <portals/api-support.h>
--#include <portals/ptlctl.h>
--#include "parser.h"
--
--
--command_t list[] = {
-- {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"},
-- {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file|stop], control debug daemon to dump debug buffer to a file"},
-- {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file <input> [output] [raw], read debug buffer from input and print it [to output]"},
-- {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"},
-- {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"},
-- {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"},
-- {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"},
-- {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"},
-- {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: <path>)"},
-- {"panic", jt_dbg_panic, 0, "cause the kernel to panic"},
-- {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
-- {"help", Parser_help, 0, "help"},
-- {"exit", Parser_quit, 0, "quit"},
-- {"quit", Parser_quit, 0, "quit"},
-- { 0, 0, 0, NULL }
--};
--
--int main(int argc, char **argv)
--{
-- if (dbg_initialize(argc, argv) < 0)
-- exit(2);
--
-- register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
--
-- Parser_init("debugctl > ", list);
-- if (argc > 1)
-- return Parser_execarg(argc - 1, &argv[1], list);
--
-- Parser_commands();
--
-- unregister_ioc_dev(PORTALS_DEV_ID);
-- return 0;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (c) 2003 Los Alamos National Laboratory (LANL)
-- *
-- * This file is part of Lustre, http://www.lustre.org/
-- *
-- * This file is free software; you can redistribute it and/or
-- * modify it under the terms of version 2.1 of the GNU Lesser General
-- * Public License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU Lesser General Public License for more details.
-- *
-- * You should have received a copy of the GNU Lesser General Public
-- * License along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- */
--
--#include <stdio.h>
--#include <sys/types.h>
--#include <sys/socket.h>
--#include <netinet/tcp.h>
--#include <netdb.h>
--#include <stdlib.h>
--#include <string.h>
--#include <fcntl.h>
--#include <sys/ioctl.h>
--#include <unistd.h>
--#include <syslog.h>
--#include <errno.h>
--
--#include <portals/api-support.h>
--#include <portals/list.h>
--#include <portals/lib-types.h>
--
--#define GMNAL_IOC_GET_GNID 1
--
--int
--roundup(int len)
--{
-- return((len+7) & (~0x7));
--}
--
--int main(int argc, char **argv)
--{
-- int rc, pfd;
-- struct portal_ioctl_data data;
-- struct portals_cfg pcfg;
-- unsigned int nid = 0, len;
-- char *name = NULL;
-- int c;
--
--
--
-- while ((c = getopt(argc, argv, "n:l")) != -1) {
-- switch(c) {
-- case('n'):
-- name = optarg;
-- break;
-- case('l'):
-- printf("Get local id not implemented yet!\n");
-- exit(-1);
-- default:
-- printf("usage %s -n nodename [-p]\n", argv[0]);
-- }
-- }
--
-- if (!name) {
-- printf("usage %s -n nodename [-p]\n", argv[0]);
-- exit(-1);
-- }
--
--
--
--
-- PCFG_INIT(pcfg, GMNAL_IOC_GET_GNID);
-- pcfg.pcfg_nal = GMNAL;
--
-- /*
-- * set up the inputs
-- */
-- len = strlen(name) + 1;
-- pcfg.pcfg_pbuf1 = malloc(len);
-- strcpy(pcfg.pcfg_pbuf1, name);
-- pcfg.pcfg_plen1 = len;
--
-- /*
-- * set up the outputs
-- */
-- pcfg.pcfg_pbuf2 = (void*)&nid;
-- pcfg.pcfg_plen2 = sizeof(unsigned int*);
--
-- pfd = open("/dev/portals", O_RDWR);
-- if ( pfd < 0 ) {
-- perror("opening portals device");
-- free(pcfg.pcfg_pbuf1);
-- exit(-1);
-- }
--
-- PORTAL_IOC_INIT(data);
-- data.ioc_pbuf1 = (char*)&pcfg;
-- data.ioc_plen1 = sizeof(pcfg);
--
-- rc = ioctl (pfd, IOC_PORTAL_NAL_CMD, &data);
-- if (rc < 0)
-- {
-- perror ("Can't get my NID");
-- }
--
-- free(pcfg.pcfg_pbuf1);
-- close(pfd);
-- printf("%u\n", nid);
-- exit(0);
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#define __USE_FILE_OFFSET64
--
--#include <stdio.h>
--#include <stdlib.h>
--#include <string.h>
--#include <sys/types.h>
--#include <sys/stat.h>
--#include <fcntl.h>
--#include <sys/mman.h>
--#include <sys/ioctl.h>
--#include <errno.h>
--#include <unistd.h>
--
--#include <portals/api-support.h>
--#include <portals/ptlctl.h>
--
--#ifndef __CYGWIN__
-- #include <syscall.h>
--#else
-- #include <windows.h>
-- #include <windef.h>
--#endif
--
--static ioc_handler_t do_ioctl; /* forward ref */
--static ioc_handler_t *current_ioc_handler = &do_ioctl;
--
--struct ioc_dev {
-- const char * dev_name;
-- int dev_fd;
--};
--
--static struct ioc_dev ioc_dev_list[10];
--
--struct dump_hdr {
-- int magic;
-- int dev_id;
- int opc;
- unsigned int opc;
--};
--
--char *dump_filename;
--
--void
--set_ioc_handler (ioc_handler_t *handler)
--{
-- if (handler == NULL)
-- current_ioc_handler = do_ioctl;
-- else
-- current_ioc_handler = handler;
--}
--
--static int
--open_ioc_dev(int dev_id)
--{
-- const char * dev_name;
--
-- if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
-- return -EINVAL;
--
-- dev_name = ioc_dev_list[dev_id].dev_name;
-- if (dev_name == NULL) {
-- fprintf(stderr, "unknown device id: %d\n", dev_id);
-- return -EINVAL;
-- }
--
-- if (ioc_dev_list[dev_id].dev_fd < 0) {
-- int fd = open(dev_name, O_RDWR);
--
-- if (fd < 0) {
-- fprintf(stderr, "opening %s failed: %s\n"
-- "hint: the kernel modules may not be loaded\n",
-- dev_name, strerror(errno));
-- return fd;
-- }
-- ioc_dev_list[dev_id].dev_fd = fd;
-- }
--
-- return ioc_dev_list[dev_id].dev_fd;
--}
--
--
--static int
- do_ioctl(int dev_id, int opc, void *buf)
-do_ioctl(int dev_id, unsigned int opc, void *buf)
--{
-- int fd, rc;
--
-- fd = open_ioc_dev(dev_id);
-- if (fd < 0)
-- return fd;
--
-- rc = ioctl(fd, opc, buf);
-- return rc;
--
--}
--
--static FILE *
--get_dump_file()
--{
-- FILE *fp = NULL;
--
-- if (!dump_filename) {
-- fprintf(stderr, "no dump filename\n");
-- } else
-- fp = fopen(dump_filename, "a");
-- return fp;
--}
--
--/*
-- * The dump file should start with a description of which devices are
-- * used, but for now it will assumed whatever app reads the file will
-- * know what to do. */
--int
- dump(int dev_id, int opc, void *buf)
-dump(int dev_id, unsigned int opc, void *buf)
--{
-- FILE *fp;
-- struct dump_hdr dump_hdr;
-- struct portal_ioctl_hdr * ioc_hdr = (struct portal_ioctl_hdr *) buf;
-- int rc;
--
-- printf("dumping opc %x to %s\n", opc, dump_filename);
--
--
-- dump_hdr.magic = 0xdeadbeef;
-- dump_hdr.dev_id = dev_id;
-- dump_hdr.opc = opc;
--
-- fp = get_dump_file();
-- if (fp == NULL) {
-- fprintf(stderr, "%s: %s\n", dump_filename,
-- strerror(errno));
-- return -EINVAL;
-- }
--
-- rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp);
-- if (rc == 1)
-- rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp);
-- fclose(fp);
-- if (rc != 1) {
-- fprintf(stderr, "%s: %s\n", dump_filename,
-- strerror(errno));
-- return -EINVAL;
-- }
--
-- return 0;
--}
--
--/* register a device to send ioctls to. */
--int
--register_ioc_dev(int dev_id, const char * dev_name)
--{
--
-- if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
-- return -EINVAL;
--
-- unregister_ioc_dev(dev_id);
--
-- ioc_dev_list[dev_id].dev_name = dev_name;
-- ioc_dev_list[dev_id].dev_fd = -1;
--
-- return dev_id;
--}
--
--void
--unregister_ioc_dev(int dev_id)
--{
--
-- if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
-- return;
-- if (ioc_dev_list[dev_id].dev_name != NULL &&
-- ioc_dev_list[dev_id].dev_fd >= 0)
-- close(ioc_dev_list[dev_id].dev_fd);
--
-- ioc_dev_list[dev_id].dev_name = NULL;
-- ioc_dev_list[dev_id].dev_fd = -1;
--}
--
--/* If this file is set, then all ioctl buffers will be
-- appended to the file. */
--int
--set_ioctl_dump(char * file)
--{
-- if (dump_filename)
-- free(dump_filename);
--
-- dump_filename = strdup(file);
-- if (dump_filename == NULL)
-- abort();
--
-- set_ioc_handler(&dump);
-- return 0;
--}
--
--int
- l_ioctl(int dev_id, int opc, void *buf)
-l_ioctl(int dev_id, unsigned int opc, void *buf)
--{
-- return current_ioc_handler(dev_id, opc, buf);
--}
--
--/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer
-- * in the file. For example:
-- *
-- * parse_dump("lctl.dump", l_ioctl);
-- *
-- * Note: if using l_ioctl, then you also need to register_ioc_dev() for
-- * each device used in the dump.
-- */
--int
- parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *))
-parse_dump(char * dump_file, ioc_handler_t ioc_func)
--{
-- int line =0;
-- struct stat st;
-- char *start, *buf, *end;
--#ifndef __CYGWIN__
-- int fd;
--#else
-- HANDLE fd, hmap;
-- DWORD size;
--#endif
--
--#ifndef __CYGWIN__
-- fd = syscall(SYS_open, dump_file, O_RDONLY);
-- if (fd < 0) {
-- fprintf(stderr, "couldn't open %s: %s\n", dump_file,
-- strerror(errno));
-- exit(1);
-- }
--
-- if (fstat(fd, &st)) {
-- perror("stat fails");
-- exit(1);
-- }
--
-- if (st.st_size < 1) {
-- fprintf(stderr, "KML is empty\n");
-- exit(1);
-- }
--
-- start = buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0);
-- end = start + st.st_size;
-- close(fd);
-- if (start == MAP_FAILED) {
-- fprintf(stderr, "can't create file mapping\n");
-- exit(1);
-- }
--#else
-- fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL,
-- OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
-- size = GetFileSize(fd, NULL);
-- if (size < 1) {
-- fprintf(stderr, "KML is empty\n");
-- exit(1);
-- }
--
-- hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL);
-- start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0);
-- end = buf + size;
-- CloseHandle(fd);
-- if (start == NULL) {
-- fprintf(stderr, "can't create file mapping\n");
-- exit(1);
-- }
--#endif /* __CYGWIN__ */
--
-- while (buf < end) {
-- struct dump_hdr *dump_hdr = (struct dump_hdr *) buf;
-- struct portal_ioctl_hdr * data;
-- char tmp[8096];
-- int rc;
--
-- line++;
--
-- data = (struct portal_ioctl_hdr *) (buf + sizeof(*dump_hdr));
-- if (buf + data->ioc_len > end ) {
-- fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf,
-- data->ioc_len, end);
-- return -1;
-- }
--#if 0
-- printf ("dump_hdr: %lx data: %lx\n",
-- (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf);
--
-- printf("%d: opcode %x len: %d ver: %x ", line, dump_hdr->opc,
-- data->ioc_len, data->ioc_version);
--#endif
--
-- memcpy(tmp, data, data->ioc_len);
--
-- rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp);
-- if (rc) {
-- printf("failed: %d\n", rc);
-- exit(1);
-- }
--
-- buf += data->ioc_len + sizeof(*dump_hdr);
-- }
--
--#ifndef __CYGWIN__
-- munmap(start, end - start);
--#else
-- UnmapViewOfFile(start);
-- CloseHandle(hmap);
--#endif
--
-- return 0;
--}
--
--int
--jt_ioc_dump(int argc, char **argv)
--{
-- if (argc > 2) {
-- fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
-- return 0;
-- }
-- printf("setting dumpfile to: %s\n", argv[1]);
--
-- set_ioctl_dump(argv[1]);
-- return 0;
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001 Cluster File Systems, Inc.
-- *
-- * This file is part of Lustre, http://www.sf.net/projects/lustre/
-- *
-- * Lustre is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Lustre is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Lustre; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--#include <stdio.h>
--#include <stdlib.h>
--#include <ctype.h>
--#include <string.h>
--#include <stddef.h>
--#include <unistd.h>
--#include <sys/param.h>
--#include <assert.h>
--
--#ifdef HAVE_LIBREADLINE
--#define READLINE_LIBRARY
--#include <readline/readline.h>
--
--/* completion_matches() is #if 0-ed out in modern glibc */
--#ifndef completion_matches
--# define completion_matches rl_completion_matches
--#endif
--#endif
--
--extern void using_history(void);
--extern void stifle_history(int);
--extern void add_history(char *);
--
--#include "parser.h"
--
--static command_t * top_level; /* Top level of commands, initialized by
-- * InitParser */
--static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */
--static int done; /* Set to 1 if user types exit or quit */
--
--
--/* static functions */
--static char *skipwhitespace(char *s);
--static char *skiptowhitespace(char *s);
--static command_t *find_cmd(char *name, command_t cmds[], char **next);
--static int process(char *s, char **next, command_t *lookup, command_t **result,
-- char **prev);
--static void print_commands(char *str, command_t *table);
--
--static char * skipwhitespace(char * s)
--{
-- char * t;
-- int len;
--
-- len = (int)strlen(s);
-- for (t = s; t <= s + len && isspace(*t); t++);
-- return(t);
--}
--
--
--static char * skiptowhitespace(char * s)
--{
-- char * t;
--
-- for (t = s; *t && !isspace(*t); t++);
-- return(t);
--}
--
--static int line2args(char *line, char **argv, int maxargs)
--{
-- char *arg;
-- int i = 0;
--
-- arg = strtok(line, " \t");
-- if ( arg ) {
-- argv[i] = arg;
-- i++;
-- } else
-- return 0;
--
-- while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) {
-- argv[i] = arg;
-- i++;
-- }
-- return i;
--}
--
--/* find a command -- return it if unique otherwise print alternatives */
--static command_t *Parser_findargcmd(char *name, command_t cmds[])
--{
-- command_t *cmd;
--
-- for (cmd = cmds; cmd->pc_name; cmd++) {
-- if (strcmp(name, cmd->pc_name) == 0)
-- return cmd;
-- }
-- return NULL;
--}
--
--int Parser_execarg(int argc, char **argv, command_t cmds[])
--{
-- command_t *cmd;
--
-- cmd = Parser_findargcmd(argv[0], cmds);
-- if ( cmd ) {
-- int rc = (cmd->pc_func)(argc, argv);
-- if (rc == CMD_HELP)
-- fprintf(stderr, "%s\n", cmd->pc_help);
-- return rc;
-- } else {
-- printf("Try interactive use without arguments or use one of:\n");
-- for (cmd = cmds; cmd->pc_name; cmd++)
-- printf("\"%s\" ", cmd->pc_name);
-- printf("\nas argument.\n");
-- }
-- return -1;
--}
--
--/* returns the command_t * (NULL if not found) corresponding to a
-- _partial_ match with the first token in name. It sets *next to
-- point to the following token. Does not modify *name. */
--static command_t * find_cmd(char * name, command_t cmds[], char ** next)
--{
-- int i, len;
--
-- if (!cmds || !name )
-- return NULL;
--
-- /* This sets name to point to the first non-white space character,
-- and next to the first whitespace after name, len to the length: do
-- this with strtok*/
-- name = skipwhitespace(name);
-- *next = skiptowhitespace(name);
-- len = *next - name;
-- if (len == 0)
-- return NULL;
--
-- for (i = 0; cmds[i].pc_name; i++) {
-- if (strncasecmp(name, cmds[i].pc_name, len) == 0) {
-- *next = skipwhitespace(*next);
-- return(&cmds[i]);
-- }
-- }
-- return NULL;
--}
--
--/* Recursively process a command line string s and find the command
-- corresponding to it. This can be ambiguous, full, incomplete,
-- non-existent. */
--static int process(char *s, char ** next, command_t *lookup,
-- command_t **result, char **prev)
--{
-- *result = find_cmd(s, lookup, next);
-- *prev = s;
--
-- /* non existent */
-- if ( ! *result )
-- return CMD_NONE;
--
-- /* found entry: is it ambigous, i.e. not exact command name and
-- more than one command in the list matches. Note that find_cmd
-- points to the first ambiguous entry */
-- if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) &&
-- find_cmd(s, (*result) + 1, next))
-- return CMD_AMBIG;
--
-- /* found a unique command: component or full? */
-- if ( (*result)->pc_func ) {
-- return CMD_COMPLETE;
-- } else {
-- if ( *next == '\0' ) {
-- return CMD_INCOMPLETE;
-- } else {
-- return process(*next, next, (*result)->pc_sub_cmd, result, prev);
-- }
-- }
--}
--
--#ifdef HAVE_LIBREADLINE
--static command_t * match_tbl; /* Command completion against this table */
--static char * command_generator(const char * text, int state)
--{
-- static int index,
-- len;
-- char *name;
--
-- /* Do we have a match table? */
-- if (!match_tbl)
-- return NULL;
--
-- /* If this is the first time called on this word, state is 0 */
-- if (!state) {
-- index = 0;
-- len = (int)strlen(text);
-- }
--
-- /* Return next name in the command list that paritally matches test */
-- while ( (name = (match_tbl + index)->pc_name) ) {
-- index++;
--
-- if (strncasecmp(name, text, len) == 0) {
-- return(strdup(name));
-- }
-- }
--
-- /* No more matches */
-- return NULL;
--}
--
--/* probably called by readline */
--static char **command_completion(char * text, int start, int end)
--{
-- command_t * table;
-- char * pos;
--
-- match_tbl = top_level;
--
-- for (table = find_cmd(rl_line_buffer, match_tbl, &pos);
-- table; table = find_cmd(pos, match_tbl, &pos))
-- {
--
-- if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd;
-- }
--
-- return completion_matches(text, command_generator);
--}
--#endif
--
--/* take a string and execute the function or print help */
--int execute_line(char * line)
--{
-- command_t *cmd, *ambig;
-- char *prev;
-- char *next, *tmp;
-- char *argv[MAXARGS];
-- int i;
-- int rc = 0;
--
-- switch( process(line, &next, top_level, &cmd, &prev) ) {
-- case CMD_AMBIG:
-- fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
-- while( (ambig = find_cmd(prev, cmd, &tmp)) ) {
-- fprintf(stderr, "%s ", ambig->pc_name);
-- cmd = ambig + 1;
-- }
-- fprintf(stderr, "\n");
-- break;
-- case CMD_NONE:
-- fprintf(stderr, "No such command, type help\n");
-- break;
-- case CMD_INCOMPLETE:
-- fprintf(stderr,
-- "'%s' incomplete command. Use '%s x' where x is one of:\n",
-- line, line);
-- fprintf(stderr, "\t");
-- for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) {
-- fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name);
-- }
-- fprintf(stderr, "\n");
-- break;
-- case CMD_COMPLETE:
-- i = line2args(line, argv, MAXARGS);
-- rc = (cmd->pc_func)(i, argv);
--
-- if (rc == CMD_HELP)
-- fprintf(stderr, "%s\n", cmd->pc_help);
--
-- break;
-- }
--
-- return rc;
--}
--
--int
--noop_fn ()
--{
-- return (0);
--}
--
--/* just in case you're ever in an airplane and discover you
-- forgot to install readline-dev. :) */
--int init_input()
--{
-- int interactive = isatty (fileno (stdin));
--
--#ifdef HAVE_LIBREADLINE
-- using_history();
-- stifle_history(HISTORY);
--
-- if (!interactive)
-- {
-- rl_prep_term_function = (rl_vintfunc_t *)noop_fn;
-- rl_deprep_term_function = (rl_voidfunc_t *)noop_fn;
-- }
--
-- rl_attempted_completion_function = (CPPFunction *)command_completion;
-- rl_completion_entry_function = (void *)command_generator;
--#endif
-- return interactive;
--}
--
--#ifndef HAVE_LIBREADLINE
--#define add_history(s)
--char * readline(char * prompt)
--{
-- char line[2048];
-- int n = 0;
-- if (prompt)
-- printf ("%s", prompt);
-- if (fgets(line, sizeof(line), stdin) == NULL)
-- return (NULL);
-- n = strlen(line);
-- if (n && line[n-1] == '\n')
-- line[n-1] = '\0';
-- return strdup(line);
--}
--#endif
--
--/* this is the command execution machine */
--int Parser_commands(void)
--{
-- char *line, *s;
-- int rc = 0;
-- int interactive;
--
-- interactive = init_input();
--
-- while(!done) {
-- line = readline(interactive ? parser_prompt : NULL);
--
-- if (!line) break;
--
-- s = skipwhitespace(line);
--
-- if (*s) {
-- add_history(s);
-- rc = execute_line(s);
-- }
--
-- free(line);
-- }
-- return rc;
--}
--
--
--/* sets the parser prompt */
--void Parser_init(char * prompt, command_t * cmds)
--{
-- done = 0;
-- top_level = cmds;
-- if (parser_prompt) free(parser_prompt);
-- parser_prompt = strdup(prompt);
--}
--
--/* frees the parser prompt */
--void Parser_exit(int argc, char *argv[])
--{
-- done = 1;
-- free(parser_prompt);
-- parser_prompt = NULL;
--}
--
--/* convert a string to an integer */
--int Parser_int(char *s, int *val)
--{
-- int ret;
--
-- if (*s != '0')
-- ret = sscanf(s, "%d", val);
-- else if (*(s+1) != 'x')
-- ret = sscanf(s, "%o", val);
-- else {
-- s++;
-- ret = sscanf(++s, "%x", val);
-- }
--
-- return(ret);
--}
--
--
--void Parser_qhelp(int argc, char *argv[]) {
--
-- printf("Available commands are:\n");
--
-- print_commands(NULL, top_level);
-- printf("For more help type: help command-name\n");
--}
--
--int Parser_help(int argc, char **argv)
--{
-- char line[1024];
-- char *next, *prev, *tmp;
-- command_t *result, *ambig;
-- int i;
--
-- if ( argc == 1 ) {
-- Parser_qhelp(argc, argv);
-- return 0;
-- }
--
-- line[0]='\0';
-- for ( i = 1 ; i < argc ; i++ ) {
-- strcat(line, argv[i]);
-- }
--
-- switch ( process(line, &next, top_level, &result, &prev) ) {
-- case CMD_COMPLETE:
-- fprintf(stderr, "%s: %s\n",line, result->pc_help);
-- break;
-- case CMD_NONE:
-- fprintf(stderr, "%s: Unknown command.\n", line);
-- break;
-- case CMD_INCOMPLETE:
-- fprintf(stderr,
-- "'%s' incomplete command. Use '%s x' where x is one of:\n",
-- line, line);
-- fprintf(stderr, "\t");
-- for (i = 0; result->pc_sub_cmd[i].pc_name; i++) {
-- fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name);
-- }
-- fprintf(stderr, "\n");
-- break;
-- case CMD_AMBIG:
-- fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
-- while( (ambig = find_cmd(prev, result, &tmp)) ) {
-- fprintf(stderr, "%s ", ambig->pc_name);
-- result = ambig + 1;
-- }
-- fprintf(stderr, "\n");
-- break;
-- }
-- return 0;
--}
--
--
--void Parser_printhelp(char *cmd)
--{
-- char *argv[] = { "help", cmd };
-- Parser_help(2, argv);
--}
--
--/*************************************************************************
-- * COMMANDS *
-- *************************************************************************/
--
--
--static void print_commands(char * str, command_t * table) {
-- command_t * cmds;
-- char buf[80];
--
-- for (cmds = table; cmds->pc_name; cmds++) {
-- if (cmds->pc_func) {
-- if (str) printf("\t%s %s\n", str, cmds->pc_name);
-- else printf("\t%s\n", cmds->pc_name);
-- }
-- if (cmds->pc_sub_cmd) {
-- if (str) {
-- sprintf(buf, "%s %s", str, cmds->pc_name);
-- print_commands(buf, cmds->pc_sub_cmd);
-- } else {
-- print_commands(cmds->pc_name, cmds->pc_sub_cmd);
-- }
-- }
-- }
--}
--
--char *Parser_getstr(const char *prompt, const char *deft, char *res,
-- size_t len)
--{
-- char *line = NULL;
-- int size = strlen(prompt) + strlen(deft) + 8;
-- char *theprompt;
-- theprompt = malloc(size);
-- assert(theprompt);
--
-- sprintf(theprompt, "%s [%s]: ", prompt, deft);
--
-- line = readline(theprompt);
-- free(theprompt);
--
-- if ( line == NULL || *line == '\0' ) {
-- strncpy(res, deft, len);
-- } else {
-- strncpy(res, line, len);
-- }
--
-- if ( line ) {
-- free(line);
-- return res;
-- } else {
-- return NULL;
-- }
--}
--
--/* get integer from prompt, loop forever to get it */
--int Parser_getint(const char *prompt, long min, long max, long deft, int base)
--{
-- int rc;
-- long result;
-- char *line;
-- int size = strlen(prompt) + 40;
-- char *theprompt = malloc(size);
-- assert(theprompt);
-- sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft);
--
-- fflush(stdout);
--
-- do {
-- line = NULL;
-- line = readline(theprompt);
-- if ( !line ) {
-- fprintf(stdout, "Please enter an integer.\n");
-- fflush(stdout);
-- continue;
-- }
-- if ( *line == '\0' ) {
-- free(line);
-- result = deft;
-- break;
-- }
-- rc = Parser_arg2int(line, &result, base);
-- free(line);
-- if ( rc != 0 ) {
-- fprintf(stdout, "Invalid string.\n");
-- fflush(stdout);
-- } else if ( result > max || result < min ) {
-- fprintf(stdout, "Error: response must lie between %ld and %ld.\n",
-- min, max);
-- fflush(stdout);
-- } else {
-- break;
-- }
-- } while ( 1 ) ;
--
-- if (theprompt)
-- free(theprompt);
-- return result;
--
--}
--
--/* get boolean (starting with YyNn; loop forever */
--int Parser_getbool(const char *prompt, int deft)
--{
-- int result = 0;
-- char *line;
-- int size = strlen(prompt) + 8;
-- char *theprompt = malloc(size);
-- assert(theprompt);
--
-- fflush(stdout);
--
-- if ( deft != 0 && deft != 1 ) {
-- fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n",
-- deft);
-- assert ( 0 );
-- }
-- sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y");
--
-- do {
-- line = NULL;
-- line = readline(theprompt);
-- if ( line == NULL ) {
-- result = deft;
-- break;
-- }
-- if ( *line == '\0' ) {
-- result = deft;
-- break;
-- }
-- if ( *line == 'y' || *line == 'Y' ) {
-- result = 1;
-- break;
-- }
-- if ( *line == 'n' || *line == 'N' ) {
-- result = 0;
-- break;
-- }
-- if ( line )
-- free(line);
-- fprintf(stdout, "Invalid string. Must start with yY or nN\n");
-- fflush(stdout);
-- } while ( 1 );
--
-- if ( line )
-- free(line);
-- if ( theprompt )
-- free(theprompt);
-- return result;
--}
--
--/* parse int out of a string or prompt for it */
--long Parser_intarg(const char *inp, const char *prompt, int deft,
-- int min, int max, int base)
--{
-- long result;
-- int rc;
--
-- rc = Parser_arg2int(inp, &result, base);
--
-- if ( rc == 0 ) {
-- return result;
-- } else {
-- return Parser_getint(prompt, deft, min, max, base);
-- }
--}
--
--/* parse int out of a string or prompt for it */
--char *Parser_strarg(char *inp, const char *prompt, const char *deft,
-- char *answer, int len)
--{
-- if ( inp == NULL || *inp == '\0' ) {
-- return Parser_getstr(prompt, deft, answer, len);
-- } else
-- return inp;
--}
--
--/* change a string into a number: return 0 on success. No invalid characters
-- allowed. The processing of base and validity follows strtol(3)*/
--int Parser_arg2int(const char *inp, long *result, int base)
--{
-- char *endptr;
--
-- if ( (base !=0) && (base < 2 || base > 36) )
-- return 1;
--
-- *result = strtol(inp, &endptr, base);
--
-- if ( *inp != '\0' && *endptr == '\0' )
-- return 0;
-- else
-- return 1;
- }
-
- /* Convert human readable size string to and int; "1k" -> 1000 */
- int Parser_size (int *sizep, char *str) {
- int size;
- char mod[32];
-
- switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) {
- default:
- return (-1);
-
- case 1:
- *sizep = size;
- return (0);
-
- case 2:
- switch (*mod) {
- case 'g':
- case 'G':
- *sizep = size << 30;
- return (0);
-
- case 'm':
- case 'M':
- *sizep = size << 20;
- return (0);
-
- case 'k':
- case 'K':
- *sizep = size << 10;
- return (0);
-
- default:
- *sizep = size;
- return (0);
- }
- }
- }
-
- /* Convert a string boolean to an int; "enable" -> 1 */
- int Parser_bool (int *b, char *str) {
- if (!strcasecmp (str, "no") ||
- !strcasecmp (str, "n") ||
- !strcasecmp (str, "off") ||
- !strcasecmp (str, "down") ||
- !strcasecmp (str, "disable"))
- {
- *b = 0;
- return (0);
- }
-
- if (!strcasecmp (str, "yes") ||
- !strcasecmp (str, "y") ||
- !strcasecmp (str, "on") ||
- !strcasecmp (str, "up") ||
- !strcasecmp (str, "enable"))
- {
- *b = 1;
- return (0);
- }
-
- return (-1);
--}
--
--int Parser_quit(int argc, char **argv)
--{
-- argc = argc;
-- argv = argv;
-- done = 1;
-- return 0;
--}
+++ /dev/null
--#ifndef _PARSER_H_
--#define _PARSER_H_
--
--#define HISTORY 100 /* Don't let history grow unbounded */
--#define MAXARGS 512
--
--#define CMD_COMPLETE 0
--#define CMD_INCOMPLETE 1
--#define CMD_NONE 2
--#define CMD_AMBIG 3
--#define CMD_HELP 4
--
--typedef struct parser_cmd {
-- char *pc_name;
-- int (* pc_func)(int, char **);
-- struct parser_cmd * pc_sub_cmd;
-- char *pc_help;
--} command_t;
--
--typedef struct argcmd {
-- char *ac_name;
-- int (*ac_func)(int, char **);
-- char *ac_help;
--} argcmd_t;
--
--typedef struct network {
-- char *type;
-- char *server;
-- int port;
--} network_t;
--
--int Parser_quit(int argc, char **argv);
--void Parser_init(char *, command_t *); /* Set prompt and load command list */
--int Parser_commands(void); /* Start the command parser */
--void Parser_qhelp(int, char **); /* Quick help routine */
--int Parser_help(int, char **); /* Detailed help routine */
--void Parser_printhelp(char *); /* Detailed help routine */
--void Parser_exit(int, char **); /* Shuts down command parser */
--int Parser_execarg(int argc, char **argv, command_t cmds[]);
--int execute_line(char * line);
--
--/* Converts a string to an integer */
--int Parser_int(char *, int *);
--
--/* Prompts for a string, with default values and a maximum length */
--char *Parser_getstr(const char *prompt, const char *deft, char *res,
-- size_t len);
--
--/* Prompts for an integer, with minimum, maximum and default values and base */
--int Parser_getint(const char *prompt, long min, long max, long deft,
-- int base);
--
--/* Prompts for a yes/no, with default */
--int Parser_getbool(const char *prompt, int deft);
--
--/* Extracts an integer from a string, or prompts if it cannot get one */
--long Parser_intarg(const char *inp, const char *prompt, int deft,
-- int min, int max, int base);
--
--/* Extracts a word from the input, or propmts if it cannot get one */
--char *Parser_strarg(char *inp, const char *prompt, const char *deft,
-- char *answer, int len);
--
--/* Extracts an integer from a string with a base */
--int Parser_arg2int(const char *inp, long *result, int base);
-
- /* Convert human readable size string to and int; "1k" -> 1000 */
- int Parser_size(int *sizep, char *str);
-
- /* Convert a string boolean to an int; "enable" -> 1 */
- int Parser_bool(int *b, char *str);
--
--#endif
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#include <stdio.h>
--#include <sys/types.h>
--#include <netdb.h>
--#include <sys/socket.h>
--#include <netinet/tcp.h>
--#include <netdb.h>
--#include <stdlib.h>
--#include <string.h>
--#include <fcntl.h>
--#include <sys/ioctl.h>
--#include <errno.h>
--#include <unistd.h>
--#include <time.h>
--#include <stdarg.h>
--#include <endian.h>
--
--#ifdef __CYGWIN__
--
--#include <netinet/in.h>
-
- #warning assuming little endian
-
- #define __cpu_to_le64(x) ((__u64)(x))
- #define __le64_to_cpu(x) ((__u64)(x))
- #define __cpu_to_le32(x) ((__u32)(x))
- #define __le32_to_cpu(x) ((__u32)(x))
- #define __cpu_to_le16(x) ((__u16)(x))
- #define __le16_to_cpu(x) ((__u16)(x))
--
--#endif /* __CYGWIN__ */
--
--#include <portals/api-support.h>
--#include <portals/ptlctl.h>
--#include <portals/list.h>
--#include <portals/lib-types.h>
--#include <portals/socknal.h>
--#include "parser.h"
--
--unsigned int portal_debug;
--unsigned int portal_printk;
- unsigned int portal_stack;
--
--static unsigned int g_nal = 0;
--
--static int g_socket_txmem = 0;
--static int g_socket_rxmem = 0;
--static int g_socket_nonagle = 1;
--
--typedef struct
--{
-- char *name;
-- int num;
--} name2num_t;
--
--static name2num_t nalnames[] = {
-- {"any", 0},
-- {"tcp", SOCKNAL},
-- {"elan", QSWNAL},
-- {"gm", GMNAL},
- {"ib", IBNAL},
- {"scimac", SCIMACNAL},
- {"openib", OPENIBNAL},
-- {NULL, -1}
--};
--
--static cfg_record_cb_t g_record_cb;
-
-/* Convert a string boolean to an int; "enable" -> 1 */
-int ptl_parse_bool (int *b, char *str) {
- if (!strcasecmp (str, "no") ||
- !strcasecmp (str, "n") ||
- !strcasecmp (str, "off") ||
- !strcasecmp (str, "down") ||
- !strcasecmp (str, "disable"))
- {
- *b = 0;
- return (0);
- }
-
- if (!strcasecmp (str, "yes") ||
- !strcasecmp (str, "y") ||
- !strcasecmp (str, "on") ||
- !strcasecmp (str, "up") ||
- !strcasecmp (str, "enable"))
- {
- *b = 1;
- return (0);
- }
-
- return (-1);
-}
-
-/* Convert human readable size string to and int; "1k" -> 1000 */
-int ptl_parse_size (int *sizep, char *str) {
- int size;
- char mod[32];
-
- switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) {
- default:
- return (-1);
-
- case 1:
- *sizep = size;
- return (0);
-
- case 2:
- switch (*mod) {
- case 'g':
- case 'G':
- *sizep = size << 30;
- return (0);
-
- case 'm':
- case 'M':
- *sizep = size << 20;
- return (0);
-
- case 'k':
- case 'K':
- *sizep = size << 10;
- return (0);
-
- default:
- *sizep = size;
- return (0);
- }
- }
-}
--
--int
--ptl_set_cfg_record_cb(cfg_record_cb_t cb)
--{
-- g_record_cb = cb;
-- return 0;
--}
--
--int
--pcfg_ioctl(struct portals_cfg *pcfg)
--{
-- int rc;
--
-- if (pcfg->pcfg_nal ==0)
-- pcfg->pcfg_nal = g_nal;
--
-- if (g_record_cb) {
-- rc = g_record_cb(PORTALS_CFG_TYPE, sizeof(*pcfg), pcfg);
-- } else {
-- struct portal_ioctl_data data;
-- PORTAL_IOC_INIT (data);
-- data.ioc_pbuf1 = (char*)pcfg;
-- data.ioc_plen1 = sizeof(*pcfg);
-- /* XXX liblustre hack XXX */
-- data.ioc_nal_cmd = pcfg->pcfg_command;
-- data.ioc_nid = pcfg->pcfg_nid;
--
-- rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
-- }
--
-- return (rc);
--}
--
--
--
--static name2num_t *
--name2num_lookup_name (name2num_t *table, char *str)
--{
-- while (table->name != NULL)
-- if (!strcmp (str, table->name))
-- return (table);
-- else
-- table++;
-- return (NULL);
--}
--
--static name2num_t *
--name2num_lookup_num (name2num_t *table, int num)
--{
-- while (table->name != NULL)
-- if (num == table->num)
-- return (table);
-- else
-- table++;
-- return (NULL);
--}
--
--int
--ptl_name2nal (char *str)
--{
-- name2num_t *e = name2num_lookup_name (nalnames, str);
--
-- return ((e == NULL) ? -1 : e->num);
--}
--
--static char *
--nal2name (int nal)
--{
-- name2num_t *e = name2num_lookup_num (nalnames, nal);
--
-- return ((e == NULL) ? "???" : e->name);
--}
--
--static struct hostent *
--ptl_gethostbyname(char * hname) {
-- struct hostent *he;
-- he = gethostbyname(hname);
-- if (!he) {
-- switch(h_errno) {
-- case HOST_NOT_FOUND:
-- case NO_ADDRESS:
-- fprintf(stderr, "Unable to resolve hostname: %s\n",
-- hname);
-- break;
-- default:
-- fprintf(stderr, "gethostbyname error: %s\n",
-- strerror(errno));
-- break;
-- }
-- return NULL;
-- }
-- return he;
--}
--
--int
--ptl_parse_port (int *port, char *str)
--{
-- char *end;
--
-- *port = strtol (str, &end, 0);
--
-- if (*end == 0 && /* parsed whole string */
-- *port > 0 && *port < 65536) /* minimal sanity check */
-- return (0);
--
-- return (-1);
--}
--
--int
--ptl_parse_time (time_t *t, char *str)
--{
-- char *end;
-- int n;
-- struct tm tm;
--
-- *t = strtol (str, &end, 0);
-- if (*end == 0) /* parsed whole string */
-- return (0);
--
-- memset (&tm, 0, sizeof (tm));
-- n = sscanf (str, "%d-%d-%d-%d:%d:%d",
-- &tm.tm_year, &tm.tm_mon, &tm.tm_mday,
-- &tm.tm_hour, &tm.tm_min, &tm.tm_sec);
-- if (n != 6)
-- return (-1);
--
-- tm.tm_mon--; /* convert to 0 == Jan */
-- tm.tm_year -= 1900; /* y2k quirk */
-- tm.tm_isdst = -1; /* dunno if it's daylight savings... */
--
-- *t = mktime (&tm);
-- if (*t == (time_t)-1)
-- return (-1);
--
-- return (0);
--}
--
--int
- ptl_parse_ipaddr (__u32 *ipaddrp, char *str)
-ptl_parse_ipquad (__u32 *ipaddrp, char *str)
--{
- struct hostent *he;
-- int a;
-- int b;
-- int c;
-- int d;
-
- if (!strcmp (str, "_all_"))
- {
- *ipaddrp = 0;
- return (0);
- }
--
-- if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 &&
-- (a & ~0xff) == 0 && (b & ~0xff) == 0 &&
-- (c & ~0xff) == 0 && (d & ~0xff) == 0)
-- {
-- *ipaddrp = (a<<24)|(b<<16)|(c<<8)|d;
- return (0);
- }
-
- return (-1);
-}
-
-int
-ptl_parse_ipaddr (__u32 *ipaddrp, char *str)
-{
- struct hostent *he;
-
- if (!strcmp (str, "_all_"))
- {
- *ipaddrp = 0;
-- return (0);
-- }
-
- if (ptl_parse_ipquad(ipaddrp, str) == 0)
- return (0);
--
-- if ((('a' <= str[0] && str[0] <= 'z') ||
-- ('A' <= str[0] && str[0] <= 'Z')) &&
-- (he = ptl_gethostbyname (str)) != NULL)
-- {
-- __u32 addr = *(__u32 *)he->h_addr;
--
-- *ipaddrp = ntohl(addr); /* HOST byte order */
-- return (0);
-- }
--
-- return (-1);
--}
--
--char *
- ptl_ipaddr_2_str (__u32 ipaddr, char *str)
-ptl_ipaddr_2_str (__u32 ipaddr, char *str, int lookup)
--{
-- __u32 net_ip;
-- struct hostent *he;
-
- net_ip = htonl (ipaddr);
- he = gethostbyaddr (&net_ip, sizeof (net_ip), AF_INET);
- if (he != NULL)
- return (he->h_name);
-
- if (lookup) {
- net_ip = htonl (ipaddr);
- he = gethostbyaddr (&net_ip, sizeof (net_ip), AF_INET);
- if (he != NULL) {
- strcpy(str, he->h_name);
- return (str);
- }
- }
--
-- sprintf (str, "%d.%d.%d.%d",
-- (ipaddr >> 24) & 0xff, (ipaddr >> 16) & 0xff,
-- (ipaddr >> 8) & 0xff, ipaddr & 0xff);
-- return (str);
--}
--
--int
--ptl_parse_nid (ptl_nid_t *nidp, char *str)
--{
-- __u32 ipaddr;
-- char *end;
-- unsigned long long ullval;
--
-- if (!strcmp (str, "_all_")) {
-- *nidp = PTL_NID_ANY;
-- return (0);
-- }
--
-- if (ptl_parse_ipaddr (&ipaddr, str) == 0) {
-- *nidp = (ptl_nid_t)ipaddr;
-- return (0);
-- }
--
-- ullval = strtoull(str, &end, 0);
-- if (*end == 0) {
-- /* parsed whole string */
-- *nidp = (ptl_nid_t)ullval;
-- return (0);
-- }
--
-- return (-1);
-}
-
-__u64 ptl_nid2u64(ptl_nid_t nid)
-{
- switch (sizeof (nid)) {
- case 8:
- return (nid);
- case 4:
- return ((__u32)nid);
- default:
- fprintf(stderr, "Unexpected sizeof(ptl_nid_t) == %u\n", sizeof(nid));
- abort();
- /* notreached */
- return (-1);
- }
--}
--
--char *
--ptl_nid2str (char *buffer, ptl_nid_t nid)
--{
- struct hostent *he = NULL;
- __u64 nid64 = ptl_nid2u64(nid);
- struct hostent *he = 0;
--
-- /* Don't try to resolve NIDs that are e.g. Elan host IDs. Assume
-- * TCP addresses in the 0.x.x.x subnet are not in use. This can
-- * happen on routers and slows things down a _lot_. Bug 3442. */
-- if (nid & 0xff000000) {
-- __u32 addr = htonl((__u32)nid); /* back to NETWORK byte order */
- he = gethostbyaddr((const char *)&addr, sizeof(addr), AF_INET);
-
- he = gethostbyaddr ((const char *)&addr, sizeof (addr), AF_INET);
-- }
-
-- if (he != NULL)
- sprintf(buffer, "%#x:%s", (int)(nid >> 32), he->h_name);
- sprintf(buffer, "%#x:%s", (int)(nid64 >> 32), he->h_name);
-- else
- sprintf(buffer, LPX64, nid);
- sprintf(buffer, LPX64, nid64);
--
-- return (buffer);
--}
--
--int g_nal_is_set ()
--{
-- if (g_nal == 0) {
-- fprintf (stderr, "Error: you must run the 'network' command first.\n");
-- return (0);
-- }
--
-- return (1);
--}
--
--int g_nal_is_compatible (char *cmd, ...)
--{
-- va_list ap;
-- int nal;
--
-- if (!g_nal_is_set ())
-- return (0);
--
-- va_start (ap, cmd);
--
-- do {
-- nal = va_arg (ap, int);
-- } while (nal != 0 && nal != g_nal);
--
-- va_end (ap);
--
-- if (g_nal == nal)
-- return (1);
--
-- if (cmd != NULL) {
-- /* Don't complain verbosely if we've not been passed a command
-- * name to complain about! */
-- fprintf (stderr, "Command %s not compatible with nal %s\n",
-- cmd, nal2name (g_nal));
-- }
-- return (0);
--}
--
--int
--sock_write (int cfd, void *buffer, int nob)
--{
-- while (nob > 0)
-- {
-- int rc = write (cfd, buffer, nob);
--
-- if (rc < 0)
-- {
-- if (errno == EINTR)
-- continue;
--
-- return (rc);
-- }
--
-- if (rc == 0)
-- {
-- fprintf (stderr, "Unexpected zero sock_write\n");
-- abort();
-- }
--
-- nob -= rc;
-- buffer = (char *)buffer + nob;
-- }
--
-- return (0);
--}
--
--int
--sock_read (int cfd, void *buffer, int nob)
--{
-- while (nob > 0)
-- {
-- int rc = read (cfd, buffer, nob);
--
-- if (rc < 0)
-- {
-- if (errno == EINTR)
-- continue;
--
-- return (rc);
-- }
--
-- if (rc == 0) /* EOF */
-- {
-- errno = ECONNABORTED;
-- return (-1);
-- }
--
-- nob -= rc;
-- buffer = (char *)buffer + nob;
-- }
--
-- return (0);
--}
--
--int ptl_initialize(int argc, char **argv)
--{
-- register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
-- return 0;
--}
--
--
--int jt_ptl_network(int argc, char **argv)
--{
-- name2num_t *entry;
-- int nal;
--
-- if (argc == 2 &&
-- (nal = ptl_name2nal (argv[1])) >= 0) {
-- g_nal = nal;
-- return (0);
-- }
--
-- fprintf(stderr, "usage: %s \n", argv[0]);
-- for (entry = nalnames; entry->name != NULL; entry++)
-- fprintf (stderr, "%s%s", entry == nalnames ? "<" : "|", entry->name);
-- fprintf(stderr, ">\n");
-- return (-1);
--}
--
- int
- jt_ptl_print_autoconnects (int argc, char **argv)
-
-int
-jt_ptl_print_interfaces (int argc, char **argv)
--{
- struct portals_cfg pcfg;
- char buffer[64];
- struct portals_cfg pcfg;
- char buffer[3][64];
-- int index;
-- int rc;
--
-- if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
-- return -1;
--
-- for (index = 0;;index++) {
- PCFG_INIT (pcfg, NAL_CMD_GET_AUTOCONN);
- pcfg.pcfg_count = index;
- PCFG_INIT (pcfg, NAL_CMD_GET_INTERFACE);
- pcfg.pcfg_count = index;
--
-- rc = pcfg_ioctl (&pcfg);
-- if (rc != 0)
-- break;
--
- printf (LPX64"@%s:%d #%d buffer %d "
- "nonagle %s affinity %s eager %s share %d\n",
- pcfg.pcfg_nid, ptl_ipaddr_2_str (pcfg.pcfg_id, buffer),
- pcfg.pcfg_misc, pcfg.pcfg_count, pcfg.pcfg_size,
- (pcfg.pcfg_flags & 1) ? "on" : "off",
- (pcfg.pcfg_flags & 2) ? "on" : "off",
- (pcfg.pcfg_flags & 4) ? "on" : "off",
- pcfg.pcfg_wait);
- printf ("%s: (%s/%s) npeer %d nroute %d\n",
- ptl_ipaddr_2_str(pcfg.pcfg_id, buffer[2], 1),
- ptl_ipaddr_2_str(pcfg.pcfg_id, buffer[0], 0),
- ptl_ipaddr_2_str(pcfg.pcfg_misc, buffer[1], 0),
- pcfg.pcfg_fd, pcfg.pcfg_count);
-- }
--
-- if (index == 0)
- printf ("<no autoconnect routes>\n");
- printf ("<no interfaces>\n");
-- return 0;
--}
--
- int
- jt_ptl_add_autoconnect (int argc, char **argv)
-int
-jt_ptl_add_interface (int argc, char **argv)
--{
- struct portals_cfg pcfg;
- ptl_nid_t nid;
- __u32 ip;
- int port;
- int irq_affinity = 0;
- int share = 0;
- int eager = 0;
- struct portals_cfg pcfg;
- __u32 ipaddr;
-- int rc;
- __u32 netmask = 0xffffff00;
--
- if (argc < 4 || argc > 5) {
- fprintf (stderr, "usage: %s nid ipaddr port [ise]\n", argv[0]);
- if (argc < 2 || argc > 3) {
- fprintf (stderr, "usage: %s ipaddr [netmask]\n", argv[0]);
-- return 0;
-- }
--
- if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
- if (!g_nal_is_compatible(argv[0], SOCKNAL, 0))
-- return -1;
--
- if (ptl_parse_nid (&nid, argv[1]) != 0 ||
- nid == PTL_NID_ANY) {
- fprintf (stderr, "Can't parse NID: %s\n", argv[1]);
- if (ptl_parse_ipaddr(&ipaddr, argv[1]) != 0) {
- fprintf (stderr, "Can't parse ip: %s\n", argv[1]);
- return -1;
- }
-
- if (argc > 2 &&
- ptl_parse_ipquad(&netmask, argv[2]) != 0) {
- fprintf (stderr, "Can't parse netmask: %s\n", argv[2]);
-- return -1;
-- }
-
- PCFG_INIT(pcfg, NAL_CMD_ADD_INTERFACE);
- pcfg.pcfg_id = ipaddr;
- pcfg.pcfg_misc = netmask;
--
- if (ptl_parse_ipaddr (&ip, argv[2]) != 0) {
- fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]);
- rc = pcfg_ioctl (&pcfg);
- if (rc != 0) {
- fprintf (stderr, "failed to add interface: %s\n",
- strerror (errno));
-- return -1;
-- }
-
- return 0;
-}
--
- if (ptl_parse_port (&port, argv[3]) != 0) {
- fprintf (stderr, "Can't parse port: %s\n", argv[3]);
-int
-jt_ptl_del_interface (int argc, char **argv)
-{
- struct portals_cfg pcfg;
- int rc;
- __u32 ipaddr = 0;
-
- if (argc > 2) {
- fprintf (stderr, "usage: %s [ipaddr]\n", argv[0]);
- return 0;
- }
-
- if (!g_nal_is_compatible(argv[0], SOCKNAL, 0))
- return -1;
-
- if (argc == 2 &&
- ptl_parse_ipaddr(&ipaddr, argv[1]) != 0) {
- fprintf (stderr, "Can't parse ip: %s\n", argv[1]);
-- return -1;
-- }
-
- PCFG_INIT(pcfg, NAL_CMD_DEL_INTERFACE);
- pcfg.pcfg_id = ipaddr;
--
- if (argc > 4) {
- char *opts = argv[4];
-
- while (*opts != 0)
- switch (*opts++) {
- case 'i':
- irq_affinity = 1;
- break;
- case 's':
- share = 1;
- break;
- case 'e':
- eager = 1;
- break;
- default:
- fprintf (stderr, "Can't parse options: %s\n",
- argv[4]);
- return -1;
- }
- rc = pcfg_ioctl (&pcfg);
- if (rc != 0) {
- fprintf (stderr, "failed to delete interface: %s\n",
- strerror (errno));
- return -1;
-- }
-
- return 0;
-}
--
- PCFG_INIT(pcfg, NAL_CMD_ADD_AUTOCONN);
-int
-jt_ptl_print_peers (int argc, char **argv)
-{
- struct portals_cfg pcfg;
- char buffer[2][64];
- int index;
- int rc;
-
- if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, 0))
- return -1;
-
- for (index = 0;;index++) {
- PCFG_INIT (pcfg, NAL_CMD_GET_PEER);
- pcfg.pcfg_count = index;
-
- rc = pcfg_ioctl (&pcfg);
- if (rc != 0)
- break;
-
- if (g_nal_is_compatible(NULL, SOCKNAL, 0))
- printf (LPX64"[%d]%s@%s:%d #%d\n",
- pcfg.pcfg_nid, pcfg.pcfg_wait,
- ptl_ipaddr_2_str (pcfg.pcfg_size, buffer[0], 1),
- ptl_ipaddr_2_str (pcfg.pcfg_id, buffer[1], 1),
- pcfg.pcfg_misc, pcfg.pcfg_count);
- else
- printf (LPX64"[%d]\n",
- pcfg.pcfg_nid, pcfg.pcfg_wait);
- }
-
- if (index == 0)
- printf ("<no peers>\n");
- return 0;
-}
-
-int
-jt_ptl_add_peer (int argc, char **argv)
-{
- struct portals_cfg pcfg;
- ptl_nid_t nid;
- __u32 ip = 0;
- int port = 0;
- int rc;
-
- if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, 0))
- return -1;
-
- if (g_nal_is_compatible(NULL, SOCKNAL, 0)) {
- if (argc != 4) {
- fprintf (stderr, "usage(tcp): %s nid ipaddr port\n",
- argv[0]);
- return 0;
- }
- } else if (argc != 2) {
- fprintf (stderr, "usage(openib): %s nid\n", argv[0]);
- return 0;
- }
-
- if (ptl_parse_nid (&nid, argv[1]) != 0 ||
- nid == PTL_NID_ANY) {
- fprintf (stderr, "Can't parse NID: %s\n", argv[1]);
- return -1;
- }
-
- if (g_nal_is_compatible (NULL, SOCKNAL, 0)) {
- if (ptl_parse_ipaddr (&ip, argv[2]) != 0) {
- fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]);
- return -1;
- }
-
- if (ptl_parse_port (&port, argv[3]) != 0) {
- fprintf (stderr, "Can't parse port: %s\n", argv[3]);
- return -1;
- }
- }
-
- PCFG_INIT(pcfg, NAL_CMD_ADD_PEER);
-- pcfg.pcfg_nid = nid;
-- pcfg.pcfg_id = ip;
-- pcfg.pcfg_misc = port;
- /* only passing one buffer size! */
- pcfg.pcfg_size = MAX (g_socket_rxmem, g_socket_txmem);
- pcfg.pcfg_flags = (g_socket_nonagle ? 0x01 : 0) |
- (irq_affinity ? 0x02 : 0) |
- (share ? 0x04 : 0) |
- (eager ? 0x08 : 0);
--
-- rc = pcfg_ioctl (&pcfg);
-- if (rc != 0) {
- fprintf (stderr, "failed to enable autoconnect: %s\n",
- fprintf (stderr, "failed to add peer: %s\n",
-- strerror (errno));
-- return -1;
-- }
--
-- return 0;
--}
--
--int
- jt_ptl_del_autoconnect (int argc, char **argv)
-jt_ptl_del_peer (int argc, char **argv)
--{
-- struct portals_cfg pcfg;
-- ptl_nid_t nid = PTL_NID_ANY;
- __u32 ip = 0;
- int share = 0;
- int keep_conn = 0;
- __u32 ip = 0;
- int single_share = 0;
- int argidx;
-- int rc;
-
- if (argc > 4) {
- fprintf (stderr, "usage: %s [nid] [ipaddr] [sk]\n",
- argv[0]);
- return 0;
- }
--
- if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
- if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, 0))
-- return -1;
--
- if (g_nal_is_compatible(NULL, SOCKNAL, 0)) {
- if (argc > 4) {
- fprintf (stderr, "usage: %s [nid] [ipaddr] [single_share]\n",
- argv[0]);
- return 0;
- }
- } else if (argc > 3) {
- fprintf (stderr, "usage: %s [nid] [single_share]\n", argv[0]);
- return 0;
- }
-
-- if (argc > 1 &&
-- ptl_parse_nid (&nid, argv[1]) != 0) {
-- fprintf (stderr, "Can't parse nid: %s\n", argv[1]);
-- return -1;
-- }
--
- if (argc > 2 &&
- ptl_parse_ipaddr (&ip, argv[2]) != 0) {
- fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]);
- return -1;
- argidx = 2;
- if (g_nal_is_compatible(NULL, SOCKNAL, 0)) {
- if (argc > argidx &&
- ptl_parse_ipaddr (&ip, argv[argidx]) != 0) {
- fprintf (stderr, "Can't parse ip addr: %s\n",
- argv[argidx]);
- return -1;
- }
- argidx++;
-- }
-
- if (argc > 3) {
- char *opts = argv[3];
-
- while (*opts != 0)
- switch (*opts++) {
- case 's':
- share = 1;
- break;
- case 'k':
- keep_conn = 1;
- break;
- default:
- fprintf (stderr, "Can't parse flags: %s\n",
- argv[3]);
- return -1;
- }
-
- if (argc > argidx) {
- if (!strcmp (argv[3], "single_share")) {
- single_share = 1;
- } else {
- fprintf (stderr, "Unrecognised arg %s'\n", argv[3]);
- return -1;
- }
-- }
--
- PCFG_INIT(pcfg, NAL_CMD_DEL_AUTOCONN);
- pcfg.pcfg_nid = nid;
- pcfg.pcfg_id = ip;
- pcfg.pcfg_flags = (share ? 1 : 0) |
- (keep_conn ? 2 : 0);
- PCFG_INIT(pcfg, NAL_CMD_DEL_PEER);
- pcfg.pcfg_nid = nid;
- pcfg.pcfg_id = ip;
- pcfg.pcfg_flags = single_share;
--
-- rc = pcfg_ioctl (&pcfg);
-- if (rc != 0) {
- fprintf (stderr, "failed to remove autoconnect route: %s\n",
- fprintf (stderr, "failed to remove peer: %s\n",
-- strerror (errno));
-- return -1;
-- }
--
-- return 0;
--}
--
--int
--jt_ptl_print_connections (int argc, char **argv)
--{
-- struct portals_cfg pcfg;
- char buffer[64];
- char buffer[2][64];
-- int index;
-- int rc;
--
- if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
- if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, 0))
-- return -1;
--
-- for (index = 0;;index++) {
-- PCFG_INIT (pcfg, NAL_CMD_GET_CONN);
-- pcfg.pcfg_count = index;
--
-- rc = pcfg_ioctl (&pcfg);
-- if (rc != 0)
-- break;
--
- printf (LPX64"@%s:%d:%s\n",
- pcfg.pcfg_nid,
- ptl_ipaddr_2_str (pcfg.pcfg_id, buffer),
- pcfg.pcfg_misc,
- (pcfg.pcfg_flags == SOCKNAL_CONN_ANY) ? "A" :
- (pcfg.pcfg_flags == SOCKNAL_CONN_CONTROL) ? "C" :
- (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_IN) ? "I" :
- (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_OUT) ? "O" : "?");
- if (g_nal_is_compatible (NULL, SOCKNAL, 0))
- printf ("[%d]%s:"LPX64"@%s:%d:%s %d/%d %s\n",
- pcfg.pcfg_gw_nal, /* scheduler */
- ptl_ipaddr_2_str (pcfg.pcfg_fd, buffer[0], 1), /* local IP addr */
- pcfg.pcfg_nid,
- ptl_ipaddr_2_str (pcfg.pcfg_id, buffer[1], 1), /* remote IP addr */
- pcfg.pcfg_misc, /* remote port */
- (pcfg.pcfg_flags == SOCKNAL_CONN_ANY) ? "A" :
- (pcfg.pcfg_flags == SOCKNAL_CONN_CONTROL) ? "C" :
- (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_IN) ? "I" :
- (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_OUT) ? "O" : "?",
- pcfg.pcfg_count, /* tx buffer size */
- pcfg.pcfg_size, /* rx buffer size */
- pcfg.pcfg_wait ? "nagle" : "nonagle");
- else
- printf (LPX64"\n",
- pcfg.pcfg_nid);
-- }
--
-- if (index == 0)
-- printf ("<no connections>\n");
-- return 0;
--}
--
--int jt_ptl_connect(int argc, char **argv)
--{
-- struct portals_cfg pcfg;
-- struct sockaddr_in srvaddr;
- struct sockaddr_in locaddr;
-- __u32 ipaddr;
-- char *flag;
-- int fd, rc;
- int nonagle = 0;
- int rxmem = 0;
- int txmem = 0;
- int bind_irq = 0;
-- int type = SOCKNAL_CONN_ANY;
- int port, rport;
- int o;
- int olen;
- int port;
--
-- if (argc < 3) {
- fprintf(stderr, "usage: %s ip port [xibctr]\n", argv[0]);
- fprintf(stderr, "usage: %s ip port [type]\n", argv[0]);
-- return 0;
-- }
--
-- if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
-- return -1;
--
-- rc = ptl_parse_ipaddr (&ipaddr, argv[1]);
-- if (rc != 0) {
-- fprintf(stderr, "Can't parse hostname: %s\n", argv[1]);
-- return -1;
-- }
--
-- if (ptl_parse_port (&port, argv[2]) != 0) {
-- fprintf (stderr, "Can't parse port: %s\n", argv[2]);
-- return -1;
-- }
--
-- if (argc > 3)
-- for (flag = argv[3]; *flag != 0; flag++)
-- switch (*flag)
-- {
- case 'i':
- bind_irq = 1;
- break;
-
-- case 'I':
-- if (type != SOCKNAL_CONN_ANY) {
-- fprintf(stderr, "Can't flag type twice\n");
-- return -1;
-- }
-- type = SOCKNAL_CONN_BULK_IN;
-- break;
--
-- case 'O':
-- if (type != SOCKNAL_CONN_ANY) {
-- fprintf(stderr, "Can't flag type twice\n");
-- return -1;
-- }
-- type = SOCKNAL_CONN_BULK_OUT;
-- break;
--
-- case 'C':
-- if (type != SOCKNAL_CONN_ANY) {
-- fprintf(stderr, "Can't flag type twice\n");
-- return -1;
-- }
-- type = SOCKNAL_CONN_CONTROL;
-- break;
--
-- default:
-- fprintf (stderr, "unrecognised flag '%c'\n",
-- *flag);
-- return (-1);
-- }
-
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_addr.s_addr = INADDR_ANY;
--
-- memset(&srvaddr, 0, sizeof(srvaddr));
-- srvaddr.sin_family = AF_INET;
-- srvaddr.sin_port = htons(port);
-- srvaddr.sin_addr.s_addr = htonl(ipaddr);
-
- for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
- fd = socket(PF_INET, SOCK_STREAM, 0);
- if ( fd < 0 ) {
- fprintf(stderr, "socket() failed: %s\n", strerror(errno));
- return -1;
- }
-
- o = 1;
- rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
- &o, sizeof(o));
--
- if (g_socket_nonagle) {
- o = 1;
- rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o));
- if (rc != 0) {
- fprintf(stderr, "cannot disable nagle: %s\n",
- strerror(errno));
- return (-1);
- }
- }
-
- if (g_socket_rxmem != 0) {
- o = g_socket_rxmem;
- rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &o, sizeof (o));
- if (rc != 0) {
- fprintf(stderr, "cannot set receive buffer size: %s\n",
- strerror(errno));
- return (-1);
- }
- }
-
- if (g_socket_txmem != 0) {
- o = g_socket_txmem;
- rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &o, sizeof (o));
- if (rc != 0) {
- fprintf(stderr, "cannot set send buffer size: %s\n", strerror(errno));
- return (-1);
- }
- }
-
- locaddr.sin_port = htons(rport);
- rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr));
- if (rc == 0 || errno == EACCES) {
- rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr));
- if (rc == 0) {
- break;
- } else if (errno != EADDRINUSE) {
- fprintf(stderr, "Error connecting to host: %s\n", strerror(errno));
- close(fd);
- return -1;
- }
- } else if (errno != EADDRINUSE) {
- fprintf(stderr, "Error binding to port %d: %d: %s\n", port, errno, strerror(errno));
- close(fd);
- return -1;
- }
- fd = socket(PF_INET, SOCK_STREAM, 0);
- if ( fd < 0 ) {
- fprintf(stderr, "socket() failed: %s\n", strerror(errno));
- return -1;
-- }
--
- if (rport == IPPORT_RESERVED / 2) {
- fprintf(stderr,
- "Warning: all privileged ports are in use.\n");
- rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr));
- if ( rc == -1 ) {
- fprintf(stderr, "connect() failed: %s\n", strerror(errno));
-- return -1;
-- }
-
- olen = sizeof (txmem);
- if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &olen) != 0)
- fprintf (stderr, "Can't get send buffer size: %s\n", strerror (errno));
- olen = sizeof (rxmem);
- if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &olen) != 0)
- fprintf (stderr, "Can't get receive buffer size: %s\n", strerror (errno));
- olen = sizeof (nonagle);
- if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0)
- fprintf (stderr, "Can't get nagle: %s\n", strerror (errno));
--
- printf("Connected host: %s snd: %d rcv: %d nagle: %s type: %s\n",
- argv[1], txmem, rxmem, nonagle ? "Disabled" : "Enabled",
- printf("Connected host: %s type: %s\n",
- argv[1],
-- (type == SOCKNAL_CONN_ANY) ? "A" :
-- (type == SOCKNAL_CONN_CONTROL) ? "C" :
-- (type == SOCKNAL_CONN_BULK_IN) ? "I" :
-- (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?");
--
-- PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD);
-- pcfg.pcfg_nal = g_nal;
-- pcfg.pcfg_fd = fd;
- pcfg.pcfg_flags = bind_irq;
-- pcfg.pcfg_misc = type;
--
-- rc = pcfg_ioctl(&pcfg);
-- if (rc) {
-- fprintf(stderr, "failed to register fd with portals: %s\n",
-- strerror(errno));
-- close (fd);
-- return -1;
-- }
--
-- printf("Connection to %s registered with socknal\n", argv[1]);
--
-- rc = close(fd);
-- if (rc)
-- fprintf(stderr, "close failed: %d\n", rc);
--
-- return 0;
--}
--
--int jt_ptl_disconnect(int argc, char **argv)
--{
- struct portals_cfg pcfg;
- struct portals_cfg pcfg;
-- ptl_nid_t nid = PTL_NID_ANY;
-- __u32 ipaddr = 0;
-- int rc;
--
-- if (argc > 3) {
-- fprintf(stderr, "usage: %s [nid] [ipaddr]\n", argv[0]);
-- return 0;
-- }
--
- if (!g_nal_is_compatible (NULL, SOCKNAL, 0))
- if (!g_nal_is_compatible (NULL, SOCKNAL, OPENIBNAL, 0))
-- return 0;
--
-- if (argc >= 2 &&
-- ptl_parse_nid (&nid, argv[1]) != 0) {
-- fprintf (stderr, "Can't parse nid %s\n", argv[1]);
-- return -1;
-- }
--
- if (argc >= 3 &&
- if (g_nal_is_compatible (NULL, SOCKNAL, 0) &&
- argc >= 3 &&
-- ptl_parse_ipaddr (&ipaddr, argv[2]) != 0) {
-- fprintf (stderr, "Can't parse ip addr %s\n", argv[2]);
-- return -1;
-- }
--
-- PCFG_INIT(pcfg, NAL_CMD_CLOSE_CONNECTION);
-- pcfg.pcfg_nid = nid;
-- pcfg.pcfg_id = ipaddr;
--
-- rc = pcfg_ioctl(&pcfg);
-- if (rc) {
-- fprintf(stderr, "failed to remove connection: %s\n",
-- strerror(errno));
-- return -1;
-- }
--
-- return 0;
--}
--
--int jt_ptl_push_connection (int argc, char **argv)
--{
- struct portals_cfg pcfg;
- struct portals_cfg pcfg;
-- int rc;
-- ptl_nid_t nid = PTL_NID_ANY;
-- __u32 ipaddr = 0;
--
-- if (argc > 3) {
-- fprintf(stderr, "usage: %s [nid] [ip]\n", argv[0]);
-- return 0;
-- }
--
-- if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
-- return -1;
--
-- if (argc > 1 &&
-- ptl_parse_nid (&nid, argv[1]) != 0) {
-- fprintf(stderr, "Can't parse nid: %s\n", argv[1]);
-- return -1;
-- }
--
-- if (argc > 2 &&
-- ptl_parse_ipaddr (&ipaddr, argv[2]) != 0) {
-- fprintf(stderr, "Can't parse ipaddr: %s\n", argv[2]);
-- }
--
-- PCFG_INIT(pcfg, NAL_CMD_PUSH_CONNECTION);
-- pcfg.pcfg_nid = nid;
-- pcfg.pcfg_id = ipaddr;
--
-- rc = pcfg_ioctl(&pcfg);
-- if (rc) {
-- fprintf(stderr, "failed to push connection: %s\n",
-- strerror(errno));
-- return -1;
-- }
--
-- return 0;
--}
--
--int
--jt_ptl_print_active_txs (int argc, char **argv)
--{
- struct portals_cfg pcfg;
- struct portals_cfg pcfg;
-- int index;
-- int rc;
--
-- if (!g_nal_is_compatible (argv[0], QSWNAL, 0))
-- return -1;
--
-- for (index = 0;;index++) {
-- PCFG_INIT(pcfg, NAL_CMD_GET_TXDESC);
-- pcfg.pcfg_count = index;
--
-- rc = pcfg_ioctl(&pcfg);
-- if (rc != 0)
-- break;
--
-- printf ("%p: %5s payload %6d bytes to "LPX64" via "LPX64" by pid %6d: %s, %s, state %d\n",
-- pcfg.pcfg_pbuf1,
-- pcfg.pcfg_count == PTL_MSG_ACK ? "ACK" :
-- pcfg.pcfg_count == PTL_MSG_PUT ? "PUT" :
-- pcfg.pcfg_count == PTL_MSG_GET ? "GET" :
-- pcfg.pcfg_count == PTL_MSG_REPLY ? "REPLY" : "<wierd message>",
-- pcfg.pcfg_size,
-- pcfg.pcfg_nid,
-- pcfg.pcfg_nid2,
-- pcfg.pcfg_misc,
-- (pcfg.pcfg_flags & 1) ? "delayed" : "immediate",
-- (pcfg.pcfg_flags & 2) ? "nblk" : "normal",
-- pcfg.pcfg_flags >> 2);
-- }
--
-- if (index == 0)
-- printf ("<no active descs>\n");
-- return 0;
--}
--
--int jt_ptl_ping(int argc, char **argv)
--{
-- int rc;
-- ptl_nid_t nid;
-- long count = 1;
-- long size = 4;
-- long timeout = 1;
-- struct portal_ioctl_data data;
--
-- if (argc < 2) {
-- fprintf(stderr, "usage: %s nid [count] [size] [timeout (secs)]\n", argv[0]);
-- return 0;
-- }
--
-- if (!g_nal_is_set())
-- return -1;
--
-- if (ptl_parse_nid (&nid, argv[1]) != 0)
-- {
-- fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
-- return (-1);
-- }
--
-- if (argc > 2)
-- {
-- count = atol(argv[2]);
--
-- if (count < 0 || count > 20000)
-- {
-- fprintf(stderr, "are you insane? %ld is a crazy count.\n", count);
-- return -1;
-- }
-- }
--
-- if (argc > 3)
-- size= atol(argv[3]);
--
-- if (argc > 4)
-- timeout = atol (argv[4]);
--
-- PORTAL_IOC_INIT (data);
-- data.ioc_count = count;
-- data.ioc_size = size;
-- data.ioc_nid = nid;
-- data.ioc_nal = g_nal;
-- data.ioc_timeout = timeout;
--
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PING, &data);
-- if (rc) {
-- fprintf(stderr, "failed to start pinger: %s\n",
-- strerror(errno));
-- return -1;
-- }
-- return 0;
--}
--
--int jt_ptl_shownid(int argc, char **argv)
--{
-- struct portal_ioctl_data data;
-- int rc;
--
-- if (argc > 1) {
-- fprintf(stderr, "usage: %s\n", argv[0]);
-- return 0;
-- }
--
-- if (!g_nal_is_set())
-- return -1;
--
-- PORTAL_IOC_INIT (data);
-- data.ioc_nal = g_nal;
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data);
-- if (rc < 0)
-- fprintf(stderr, "getting my NID failed: %s\n",
-- strerror (errno));
-- else
-- printf(LPX64"\n", data.ioc_nid);
-- return 0;
--}
--
--int jt_ptl_mynid(int argc, char **argv)
--{
-- int rc;
-- char hostname[1024];
-- char *nidstr;
-- struct portals_cfg pcfg;
-- ptl_nid_t mynid;
-
-
-- if (argc > 2) {
-- fprintf(stderr, "usage: %s [NID]\n", argv[0]);
-- fprintf(stderr, "NID defaults to the primary IP address of the machine.\n");
-- return 0;
-- }
--
-- if (!g_nal_is_set())
-- return -1;
--
-- if (argc >= 2)
-- nidstr = argv[1];
-- else if (gethostname(hostname, sizeof(hostname)) != 0) {
-- fprintf(stderr, "gethostname failed: %s\n",
-- strerror(errno));
-- return -1;
-- }
-- else
-- nidstr = hostname;
--
-- rc = ptl_parse_nid (&mynid, nidstr);
-- if (rc != 0) {
-- fprintf (stderr, "Can't convert '%s' into a NID\n", nidstr);
-- return -1;
-- }
--
-- PCFG_INIT(pcfg, NAL_CMD_REGISTER_MYNID);
-- pcfg.pcfg_nid = mynid;
--
-- rc = pcfg_ioctl(&pcfg);
-- if (rc < 0)
-- fprintf(stderr, "setting my NID failed: %s\n",
-- strerror(errno));
-- else
- printf("registered my nid "LPX64" (%s)\n", mynid, hostname);
- printf("registered my nid "LPX64" (%s)\n",
- ptl_nid2u64(mynid), hostname);
-- return 0;
--}
--
--int
--jt_ptl_fail_nid (int argc, char **argv)
--{
-- int rc;
-- ptl_nid_t nid;
-- unsigned int threshold;
-- struct portal_ioctl_data data;
--
-- if (argc < 2 || argc > 3)
-- {
-- fprintf (stderr, "usage: %s nid|\"_all_\" [count (0 == mend)]\n", argv[0]);
-- return (0);
-- }
--
-- if (!g_nal_is_set())
-- return (-1);
--
-- if (!strcmp (argv[1], "_all_"))
-- nid = PTL_NID_ANY;
-- else if (ptl_parse_nid (&nid, argv[1]) != 0)
-- {
-- fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
-- return (-1);
-- }
--
-- if (argc < 3)
-- threshold = PTL_MD_THRESH_INF;
-- else if (sscanf (argv[2], "%i", &threshold) != 1) {
-- fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]);
-- return (-1);
-- }
--
-- PORTAL_IOC_INIT (data);
-- data.ioc_nal = g_nal;
-- data.ioc_nid = nid;
-- data.ioc_count = threshold;
--
-- rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_FAIL_NID, &data);
-- if (rc < 0)
-- fprintf (stderr, "IOC_PORTAL_FAIL_NID failed: %s\n",
-- strerror (errno));
-- else
-- printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]);
-
- return (0);
- }
-
- int
- jt_ptl_rxmem (int argc, char **argv)
- {
- int size;
-
- if (argc > 1)
- {
- if (Parser_size (&size, argv[1]) != 0 || size < 0)
- {
- fprintf (stderr, "Can't parse size %s\n", argv[1]);
- return (0);
- }
-
- g_socket_rxmem = size;
- }
- printf ("Socket rmem = %d\n", g_socket_rxmem);
- return (0);
- }
-
- int
- jt_ptl_txmem (int argc, char **argv)
- {
- int size;
--
- if (argc > 1)
- {
- if (Parser_size (&size, argv[1]) != 0 || size < 0)
- {
- fprintf (stderr, "Can't parse size %s\n", argv[1]);
- return (0);
- }
- g_socket_txmem = size;
- }
- printf ("Socket txmem = %d\n", g_socket_txmem);
- return (0);
- }
-
- int
- jt_ptl_nagle (int argc, char **argv)
- {
- int enable;
-
- if (argc > 1)
- {
- if (Parser_bool (&enable, argv[1]) != 0)
- {
- fprintf (stderr, "Can't parse boolean %s\n", argv[1]);
- return (-1);
- }
- g_socket_nonagle = !enable;
- }
- printf ("Nagle %s\n", g_socket_nonagle ? "disabled" : "enabled");
-- return (0);
--}
--
--int
--jt_ptl_add_route (int argc, char **argv)
--{
-- struct portals_cfg pcfg;
-- ptl_nid_t nid1;
-- ptl_nid_t nid2;
-- ptl_nid_t gateway_nid;
-- int rc;
--
-- if (argc < 3)
-- {
-- fprintf (stderr, "usage: %s gateway target [target]\n", argv[0]);
-- return (0);
-- }
--
-- if (!g_nal_is_set())
-- return (-1);
--
-- if (ptl_parse_nid (&gateway_nid, argv[1]) != 0)
-- {
-- fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]);
-- return (-1);
-- }
--
-- if (ptl_parse_nid (&nid1, argv[2]) != 0)
-- {
-- fprintf (stderr, "Can't parse first target NID \"%s\"\n", argv[2]);
-- return (-1);
-- }
--
-- if (argc < 4)
-- nid2 = nid1;
-- else if (ptl_parse_nid (&nid2, argv[3]) != 0)
-- {
-- fprintf (stderr, "Can't parse second target NID \"%s\"\n", argv[4]);
-- return (-1);
-- }
--
-- PCFG_INIT(pcfg, NAL_CMD_ADD_ROUTE);
-- pcfg.pcfg_nid = gateway_nid;
-- pcfg.pcfg_nal = ROUTER;
-- pcfg.pcfg_gw_nal = g_nal;
-- pcfg.pcfg_nid2 = MIN (nid1, nid2);
-- pcfg.pcfg_nid3 = MAX (nid1, nid2);
--
-- rc = pcfg_ioctl(&pcfg);
-- if (rc != 0)
-- {
-- fprintf (stderr, "NAL_CMD_ADD_ROUTE failed: %s\n", strerror (errno));
-- return (-1);
-- }
--
-- return (0);
--}
--
--int
--jt_ptl_del_route (int argc, char **argv)
--{
-- struct portals_cfg pcfg;
-- ptl_nid_t nid;
-- ptl_nid_t nid1 = PTL_NID_ANY;
-- ptl_nid_t nid2 = PTL_NID_ANY;
-- int rc;
--
-- if (argc < 2)
-- {
-- fprintf (stderr, "usage: %s targetNID\n", argv[0]);
-- return (0);
-- }
--
-- if (!g_nal_is_set())
-- return (-1);
--
-- if (ptl_parse_nid (&nid, argv[1]) != 0)
-- {
-- fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]);
-- return (-1);
-- }
--
-- if (argc >= 3 &&
-- ptl_parse_nid (&nid1, argv[2]) != 0)
-- {
-- fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[2]);
-- return (-1);
-- }
--
-- if (argc < 4) {
-- nid2 = nid1;
-- } else {
-- if (ptl_parse_nid (&nid2, argv[3]) != 0) {
-- fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[3]);
-- return (-1);
-- }
--
-- if (nid1 > nid2) {
-- ptl_nid_t tmp = nid1;
--
-- nid1 = nid2;
-- nid2 = tmp;
-- }
-- }
--
-- PCFG_INIT(pcfg, NAL_CMD_DEL_ROUTE);
-- pcfg.pcfg_nal = ROUTER;
-- pcfg.pcfg_gw_nal = g_nal;
-- pcfg.pcfg_nid = nid;
-- pcfg.pcfg_nid2 = nid1;
-- pcfg.pcfg_nid3 = nid2;
--
-- rc = pcfg_ioctl(&pcfg);
-- if (rc != 0)
-- {
- fprintf (stderr, "NAL_CMD_DEL_ROUTE ("LPX64") failed: %s\n", nid, strerror (errno));
- fprintf (stderr, "NAL_CMD_DEL_ROUTE ("LPX64") failed: %s\n",
- ptl_nid2u64(nid), strerror (errno));
-- return (-1);
-- }
--
-- return (0);
--}
--
--int
--jt_ptl_notify_router (int argc, char **argv)
--{
-- struct portals_cfg pcfg;
-- int enable;
-- ptl_nid_t nid;
-- int rc;
-- struct timeval now;
-- time_t when;
--
-- if (argc < 3)
-- {
-- fprintf (stderr, "usage: %s targetNID <up/down> [<time>]\n",
-- argv[0]);
-- return (0);
-- }
--
-- if (ptl_parse_nid (&nid, argv[1]) != 0)
-- {
-- fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[1]);
-- return (-1);
-- }
--
- if (Parser_bool (&enable, argv[2]) != 0) {
- if (ptl_parse_bool (&enable, argv[2]) != 0) {
-- fprintf (stderr, "Can't parse boolean %s\n", argv[2]);
-- return (-1);
-- }
--
-- gettimeofday(&now, NULL);
--
-- if (argc < 4) {
-- when = now.tv_sec;
-- } else if (ptl_parse_time (&when, argv[3]) != 0) {
-- fprintf(stderr, "Can't parse time %s\n"
-- "Please specify either 'YYYY-MM-DD-HH:MM:SS'\n"
-- "or an absolute unix time in seconds\n", argv[3]);
-- return (-1);
-- } else if (when > now.tv_sec) {
-- fprintf (stderr, "%s specifies a time in the future\n",
-- argv[3]);
-- return (-1);
-- }
--
-- PCFG_INIT(pcfg, NAL_CMD_NOTIFY_ROUTER);
-- pcfg.pcfg_nal = ROUTER;
-- pcfg.pcfg_gw_nal = g_nal;
-- pcfg.pcfg_nid = nid;
-- pcfg.pcfg_flags = enable;
-- /* Yeuch; 'cept I need a __u64 on 64 bit machines... */
-- pcfg.pcfg_nid3 = (__u64)when;
--
-- rc = pcfg_ioctl(&pcfg);
-- if (rc != 0)
-- {
-- fprintf (stderr, "NAL_CMD_NOTIFY_ROUTER ("LPX64") failed: %s\n",
- nid, strerror (errno));
- ptl_nid2u64(nid), strerror (errno));
-- return (-1);
-- }
--
-- return (0);
--}
--
--int
--jt_ptl_print_routes (int argc, char **argv)
--{
-- char buffer[3][128];
-- struct portals_cfg pcfg;
-- int rc;
-- int index;
-- int gateway_nal;
-- ptl_nid_t gateway_nid;
-- ptl_nid_t nid1;
-- ptl_nid_t nid2;
-- int alive;
--
-- for (index = 0;;index++)
-- {
-- PCFG_INIT(pcfg, NAL_CMD_GET_ROUTE);
-- pcfg.pcfg_nal = ROUTER;
-- pcfg.pcfg_count = index;
--
-- rc = pcfg_ioctl(&pcfg);
-- if (rc != 0)
-- break;
--
-- gateway_nal = pcfg.pcfg_gw_nal;
-- gateway_nid = pcfg.pcfg_nid;
-- nid1 = pcfg.pcfg_nid2;
-- nid2 = pcfg.pcfg_nid3;
-- alive = pcfg.pcfg_flags;
--
-- printf ("%8s %18s : %s - %s, %s\n",
-- nal2name (gateway_nal),
-- ptl_nid2str (buffer[0], gateway_nid),
-- ptl_nid2str (buffer[1], nid1),
-- ptl_nid2str (buffer[2], nid2),
-- alive ? "up" : "down");
-- }
-- return (0);
--}
--
--static int
--lwt_control(int enable, int clear)
--{
-- struct portal_ioctl_data data;
-- int rc;
--
-- PORTAL_IOC_INIT(data);
-- data.ioc_flags = enable;
-- data.ioc_misc = clear;
--
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_LWT_CONTROL, &data);
-- if (rc == 0)
-- return (0);
--
-- fprintf(stderr, "IOC_PORTAL_LWT_CONTROL failed: %s\n",
-- strerror(errno));
-- return (-1);
--}
--
--static int
--lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize,
-- lwt_event_t *events, int size)
--{
-- struct portal_ioctl_data data;
-- int rc;
--
-- PORTAL_IOC_INIT(data);
-- data.ioc_pbuf1 = (char *)events;
-- data.ioc_plen1 = size;
--
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_LWT_SNAPSHOT, &data);
-- if (rc != 0) {
-- fprintf(stderr, "IOC_PORTAL_LWT_SNAPSHOT failed: %s\n",
-- strerror(errno));
- return (-1);
- }
-
- /* crappy overloads */
- if (data.ioc_nid != sizeof(lwt_event_t) ||
- data.ioc_nid2 != offsetof(lwt_event_t, lwte_where)) {
- fprintf(stderr,"kernel/user LWT event mismatch %d(%d),%d(%d)\n",
- (int)data.ioc_nid, sizeof(lwt_event_t),
- (int)data.ioc_nid2,
- (int)offsetof(lwt_event_t, lwte_where));
-- return (-1);
-- }
--
-- LASSERT (data.ioc_count != 0);
-- LASSERT (data.ioc_misc != 0);
-
-
-- if (now != NULL)
-- *now = data.ioc_nid;
--
-- if (ncpu != NULL)
-- *ncpu = data.ioc_count;
--
-- if (totalsize != NULL)
-- *totalsize = data.ioc_misc;
--
-- return (0);
--}
--
--static char *
--lwt_get_string(char *kstr)
--{
-- char *ustr;
-- struct portal_ioctl_data data;
-- int size;
-- int rc;
--
-- /* FIXME: this could maintain a symbol table since we expect to be
-- * looking up the same strings all the time... */
--
-- PORTAL_IOC_INIT(data);
-- data.ioc_pbuf1 = kstr;
-- data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */
-- data.ioc_pbuf2 = NULL;
-- data.ioc_plen2 = 0;
--
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_LWT_LOOKUP_STRING, &data);
-- if (rc != 0) {
-- fprintf(stderr, "IOC_PORTAL_LWT_LOOKUP_STRING failed: %s\n",
-- strerror(errno));
-- return (NULL);
-- }
--
-- size = data.ioc_count;
-- ustr = (char *)malloc(size);
-- if (ustr == NULL) {
-- fprintf(stderr, "Can't allocate string storage of size %d\n",
-- size);
-- return (NULL);
-- }
--
-- PORTAL_IOC_INIT(data);
-- data.ioc_pbuf1 = kstr;
-- data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */
-- data.ioc_pbuf2 = ustr;
-- data.ioc_plen2 = size;
--
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_LWT_LOOKUP_STRING, &data);
-- if (rc != 0) {
-- fprintf(stderr, "IOC_PORTAL_LWT_LOOKUP_STRING failed: %s\n",
-- strerror(errno));
-- return (NULL);
-- }
--
-- LASSERT(strlen(ustr) == size - 1);
-- return (ustr);
--}
--
--static void
--lwt_put_string(char *ustr)
--{
-- free(ustr);
--}
--
--static int
--lwt_print(FILE *f, cycles_t t0, cycles_t tlast, double mhz, int cpu, lwt_event_t *e)
--{
- char whenstr[32];
-- char *where = lwt_get_string(e->lwte_where);
--
-- if (where == NULL)
-- return (-1);
-
- sprintf(whenstr, LPD64, e->lwte_when - t0);
--
-- fprintf(f, "%#010lx %#010lx %#010lx %#010lx: %#010lx %1d %10.6f %10.2f %s\n",
-- e->lwte_p1, e->lwte_p2, e->lwte_p3, e->lwte_p4,
-- (long)e->lwte_task, cpu, (e->lwte_when - t0) / (mhz * 1000000.0),
-- (t0 == e->lwte_when) ? 0.0 : (e->lwte_when - tlast) / mhz,
-- where);
--
-- lwt_put_string(where);
--
-- return (0);
--}
--
--double
--get_cycles_per_usec ()
--{
-- FILE *f = fopen ("/proc/cpuinfo", "r");
-- double mhz;
-- char line[64];
--
-- if (f != NULL) {
-- while (fgets (line, sizeof (line), f) != NULL)
-- if (sscanf (line, "cpu MHz : %lf", &mhz) == 1) {
-- fclose (f);
-- return (mhz);
-- }
-- fclose (f);
-- }
--
-- fprintf (stderr, "Can't read/parse /proc/cpuinfo\n");
-- return (1000.0);
--}
--
--int
--jt_ptl_lwt(int argc, char **argv)
--{
- const int lwt_max_cpus = 32;
-- int ncpus;
-- int totalspace;
-- int nevents_per_cpu;
-- lwt_event_t *events;
- lwt_event_t *cpu_event[LWT_MAX_CPUS + 1];
- lwt_event_t *next_event[LWT_MAX_CPUS];
- lwt_event_t *first_event[LWT_MAX_CPUS];
- lwt_event_t *cpu_event[lwt_max_cpus + 1];
- lwt_event_t *next_event[lwt_max_cpus];
- lwt_event_t *first_event[lwt_max_cpus];
-- int cpu;
-- lwt_event_t *e;
-- int rc;
-- int i;
-- double mhz;
-- cycles_t t0;
-- cycles_t tlast;
-- cycles_t tnow;
-- struct timeval tvnow;
-- int printed_date = 0;
- int nlines = 0;
-- FILE *f = stdout;
--
-- if (argc < 2 ||
-- (strcmp(argv[1], "start") &&
-- strcmp(argv[1], "stop"))) {
-- fprintf(stderr,
-- "usage: %s start\n"
-- " %s stop [fname]\n", argv[0], argv[0]);
-- return (-1);
-- }
--
-- if (!strcmp(argv[1], "start")) {
-- /* disable */
-- if (lwt_control(0, 0) != 0)
-- return (-1);
--
-- /* clear */
-- if (lwt_control(0, 1) != 0)
-- return (-1);
--
-- /* enable */
-- if (lwt_control(1, 0) != 0)
-- return (-1);
--
-- return (0);
-- }
--
-- if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0)
-- return (-1);
--
- if (ncpus > LWT_MAX_CPUS) {
- if (ncpus > lwt_max_cpus) {
-- fprintf(stderr, "Too many cpus: %d (%d)\n",
- ncpus, LWT_MAX_CPUS);
- ncpus, lwt_max_cpus);
-- return (-1);
-- }
--
-- events = (lwt_event_t *)malloc(totalspace);
-- if (events == NULL) {
-- fprintf(stderr, "Can't allocate %d\n", totalspace);
-- return (-1);
-- }
--
-- if (lwt_control(0, 0) != 0) { /* disable */
-- free(events);
-- return (-1);
-- }
--
-- if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) {
-- free(events);
-- return (-1);
-- }
--
-- /* we want this time to be sampled at snapshot time */
-- gettimeofday(&tvnow, NULL);
--
-- if (argc > 2) {
-- f = fopen (argv[2], "w");
-- if (f == NULL) {
-- fprintf(stderr, "Can't open %s for writing: %s\n", argv[2], strerror (errno));
-- free(events);
-- return (-1);
-- }
-- }
--
-- mhz = get_cycles_per_usec();
--
-- /* carve events into per-cpu slices */
-- nevents_per_cpu = totalspace / (ncpus * sizeof(lwt_event_t));
-- for (cpu = 0; cpu <= ncpus; cpu++)
-- cpu_event[cpu] = &events[cpu * nevents_per_cpu];
--
-- /* find the earliest event on each cpu */
-- for (cpu = 0; cpu < ncpus; cpu++) {
-- first_event[cpu] = NULL;
--
-- for (e = cpu_event[cpu]; e < cpu_event[cpu + 1]; e++) {
--
-- if (e->lwte_where == NULL) /* not an event */
-- continue;
--
-- if (first_event[cpu] == NULL ||
-- first_event[cpu]->lwte_when > e->lwte_when)
-- first_event[cpu] = e;
-- }
--
-- next_event[cpu] = first_event[cpu];
-- }
--
-- t0 = tlast = 0;
-- for (cpu = 0; cpu < ncpus; cpu++) {
-- e = first_event[cpu];
-- if (e == NULL) /* no events this cpu */
-- continue;
--
-- if (e == cpu_event[cpu])
-- e = cpu_event[cpu + 1] - 1;
-- else
-- e = e - 1;
--
-- /* If there's an event immediately before the first one, this
-- * cpu wrapped its event buffer */
-- if (e->lwte_where == NULL)
-- continue;
--
-- /* We should only start outputting events from the most recent
-- * first event in any wrapped cpu. Events before this time on
-- * other cpus won't have any events from this CPU to interleave
-- * with. */
-- if (t0 < first_event[cpu]->lwte_when)
-- t0 = first_event[cpu]->lwte_when;
-- }
--
-- for (;;) {
-- /* find which cpu has the next event */
-- cpu = -1;
-- for (i = 0; i < ncpus; i++) {
--
-- if (next_event[i] == NULL) /* this cpu exhausted */
-- continue;
--
-- if (cpu < 0 ||
-- next_event[i]->lwte_when < next_event[cpu]->lwte_when)
-- cpu = i;
-- }
--
-- if (cpu < 0) /* all cpus exhausted */
-- break;
--
-- if (t0 == 0) {
-- /* no wrapped cpus and this is he first ever event */
-- t0 = next_event[cpu]->lwte_when;
-- }
--
-- if (t0 <= next_event[cpu]->lwte_when) {
-- /* on or after the first event */
-- if (!printed_date) {
-- cycles_t du = (tnow - t0) / mhz;
-- time_t then = tvnow.tv_sec - du/1000000;
--
-- if (du % 1000000 > tvnow.tv_usec)
-- then--;
--
-- fprintf(f, "%s", ctime(&then));
-- printed_date = 1;
-- }
--
-- rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]);
-- if (rc != 0)
-- break;
-
- if (++nlines % 10000 == 0 && f != stdout) {
- /* show some activity... */
- printf(".");
- fflush (stdout);
- }
-- }
--
-- tlast = next_event[cpu]->lwte_when;
--
-- next_event[cpu]++;
-- if (next_event[cpu] == cpu_event[cpu + 1])
-- next_event[cpu] = cpu_event[cpu];
--
-- if (next_event[cpu]->lwte_where == NULL ||
-- next_event[cpu] == first_event[cpu])
-- next_event[cpu] = NULL;
-- }
--
- if (f != stdout)
- if (f != stdout) {
- printf("\n");
-- fclose(f);
- }
--
-- free(events);
-- return (0);
--}
--
--int jt_ptl_memhog(int argc, char **argv)
--{
-- static int gfp = 0; /* sticky! */
--
-- struct portal_ioctl_data data;
-- int rc;
-- int count;
-- char *end;
--
-- if (argc < 2) {
-- fprintf(stderr, "usage: %s <npages> [<GFP flags>]\n", argv[0]);
-- return 0;
-- }
--
-- count = strtol(argv[1], &end, 0);
-- if (count < 0 || *end != 0) {
-- fprintf(stderr, "Can't parse page count '%s'\n", argv[1]);
-- return -1;
-- }
--
-- if (argc >= 3) {
-- rc = strtol(argv[2], &end, 0);
-- if (*end != 0) {
-- fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]);
-- return -1;
-- }
-- gfp = rc;
-- }
--
-- PORTAL_IOC_INIT(data);
-- data.ioc_count = count;
-- data.ioc_flags = gfp;
-- rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data);
--
-- if (rc != 0) {
-- fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno));
-- return -1;
-- }
--
-- printf("memhog %d OK\n", count);
-- return 0;
--}
--
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- *
-- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-- *
-- * This file is part of Portals, http://www.sf.net/projects/lustre/
-- *
-- * Portals is free software; you can redistribute it and/or
-- * modify it under the terms of version 2 of the GNU General Public
-- * License as published by the Free Software Foundation.
-- *
-- * Portals is distributed in the hope that it will be useful,
-- * but WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-- * GNU General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with Portals; if not, write to the Free Software
-- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-- *
-- */
--
--#include <stdio.h>
--#include <stdlib.h>
--#include <portals/api-support.h>
--#include <portals/ptlctl.h>
--
--#include "parser.h"
--
--
--command_t list[] = {
-- {"network", jt_ptl_network, 0,"setup the NAL (args: nal name)"},
- {"print_autoconns", jt_ptl_print_autoconnects, 0, "print autoconnect entries (no args)"},
- {"add_autoconn", jt_ptl_add_autoconnect, 0, "add autoconnect entry (args: nid host [ise])"},
- {"del_autoconn", jt_ptl_del_autoconnect, 0, "delete autoconnect entry (args: [nid] [host] [ks])"},
- {"print_interfaces", jt_ptl_print_interfaces, 0, "print interface entries (no args)"},
- {"add_interface", jt_ptl_add_interface, 0, "add interface entry (args: ip [netmask])"},
- {"del_interface", jt_ptl_del_interface, 0, "delete interface entries (args: [ip])"},
- {"print_peers", jt_ptl_print_peers, 0, "print peer entries (no args)"},
- {"add_peer", jt_ptl_add_peer, 0, "add peer entry (args: nid host port)"},
- {"del_peer", jt_ptl_del_peer, 0, "delete peer entry (args: [nid] [host])"},
-- {"print_conns", jt_ptl_print_connections, 0, "print connections (no args)"},
-- {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: host port [iIOC])"},
-- {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"},
-- {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"},
-- {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"},
-- {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"},
-- {"shownid", jt_ptl_shownid, 0, "print the local NID"},
-- {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"},
-- {"add_route", jt_ptl_add_route, 0,
-- "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"},
-- {"del_route", jt_ptl_del_route, 0,
-- "delete all routes via a gateway from the routing table (args: gatewayNID"},
-- {"set_route", jt_ptl_notify_router, 0,
-- "enable/disable a route in the routing table (args: gatewayNID up/down [time]"},
-- {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"},
- {"recv_mem", jt_ptl_rxmem, 0, "Set socket receive buffer size (args: [size])"},
- {"send_mem", jt_ptl_txmem, 0, "Set socket send buffer size (args: [size])"},
- {"nagle", jt_ptl_nagle, 0, "Enable/Disable Nagle (args: [on/off])"},
-- {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
-- {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"},
-- {"help", Parser_help, 0, "help"},
-- {"exit", Parser_quit, 0, "quit"},
-- {"quit", Parser_quit, 0, "quit"},
-- { 0, 0, 0, NULL }
--};
--
--int main(int argc, char **argv)
--{
-- if (ptl_initialize(argc, argv) < 0)
-- exit(1);
--
-- Parser_init("ptlctl > ", list);
-- if (argc > 1)
-- return Parser_execarg(argc - 1, &argv[1], list);
--
-- Parser_commands();
--
-- return 0;
--}
+++ /dev/null
--#include <stdio.h>
--#include <errno.h>
--#include <string.h>
--#include <fcntl.h>
--#include <unistd.h>
--#include <stdlib.h>
--#include <sys/types.h>
--#include <sys/time.h>
--
--double
--timenow ()
--{
-- struct timeval tv;
--
-- gettimeofday (&tv, NULL);
-- return (tv.tv_sec + tv.tv_usec / 1000000.0);
--}
--
--void
--do_stat (int fd)
--{
-- static char buffer[1024];
-- static double last = 0.0;
-- static unsigned long long old_bytes;
-- static unsigned long old_packets;
-- static unsigned long old_errors;
-- double now;
-- double t;
-- unsigned long long new_bytes, bytes;
-- unsigned long new_packets, packets;
-- unsigned long new_errors, errors;
-- unsigned long depth;
-- int n;
--
-- lseek (fd, 0, SEEK_SET);
-- now = timenow();
-- n = read (fd, buffer, sizeof (buffer));
-- if (n < 0)
-- {
-- fprintf (stderr, "Can't read statfile\n");
-- exit (1);
-- }
-- buffer[n] = 0;
--
-- n = sscanf (buffer, "%Lu %lu %lu %lu",
-- &new_bytes, &new_packets, &new_errors, &depth);
--
-- if (n < 3)
-- {
-- fprintf (stderr, "Can't parse statfile\n");
-- exit (1);
-- }
--
-- if (last == 0.0)
-- printf ("%llu bytes, %lu packets (sz %lld), %lu errors",
-- new_bytes, new_packets,
-- (long long)((new_packets == 0) ? 0LL : new_bytes/new_packets),
-- new_errors);
-- else
-- {
-- t = now - last;
--
-- if (new_bytes < old_bytes)
-- bytes = -1ULL - old_bytes + new_bytes + 1;
-- else
-- bytes = new_bytes - old_bytes;
-- if (new_packets < old_packets)
-- packets = -1UL - old_packets + new_packets + 1;
-- else
-- packets = new_packets - old_packets;
-- if (new_errors < old_errors)
-- errors = -1UL - old_errors + new_errors + 1;
-- else
-- errors = new_errors - old_errors;
--
-- printf ("%9llu bytes (%7.2fMb/s), %7lu packets (sz %5lld, %5ld/s), %lu errors (%ld/s)",
-- bytes, ((double)bytes)/((1<<20) * t),
-- packets, (long long)((packets == 0) ? 0LL : bytes/packets), (long)(packets/t),
-- errors, (long)(errors/t));
-- }
-- old_bytes = new_bytes;
-- old_packets = new_packets;
-- old_errors = new_errors;
--
-- if (n == 4)
-- printf (", depth (%ld)\n", depth);
-- else
-- printf ("\n");
--
-- fflush (stdout);
--
-- lseek (fd, 0, SEEK_SET);
-- last = timenow();
--}
--
--int main (int argc, char **argv)
--{
-- int interval = 0;
-- int fd;
--
-- if (argc > 1)
-- interval = atoi (argv[1]);
--
-- fd = open ("/proc/sys/portals/router", O_RDONLY);
-- if (fd < 0)
-- {
-- fprintf (stderr, "Can't open stat: %s\n", strerror (errno));
-- return (1);
-- }
--
-- do_stat (fd);
-- if (interval == 0)
-- return (0);
--
-- for (;;)
-- {
-- sleep (interval);
-- do_stat (fd);
-- }
--}
+++ /dev/null
--/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-- * vim:expandtab:shiftwidth=8:tabstop=8:
-- */
--#include <stdio.h>
--#include <sys/types.h>
--#include <sys/wait.h>
--#include <portals/api-support.h>
--#include <portals/list.h>
--#include <portals/lib-types.h>
--
--extern size_t strnlen(const char *, size_t);
--
--#define BLANK_LINE() \
--do { \
-- printf ("\n"); \
--} while (0)
--
--#define COMMENT(c) \
--do { \
-- printf (" /* "c" */\n"); \
--} while (0)
--
--#define STRINGIFY(a) #a
--
--#define CHECK_DEFINE(a) \
--do { \
-- printf (" LASSERT ("#a" == "STRINGIFY(a)");\n"); \
--} while (0)
--
--#define CHECK_VALUE(a) \
--do { \
-- printf (" LASSERT ("#a" == %d);\n", a); \
--} while (0)
--
--#define CHECK_MEMBER_OFFSET(s,m) \
--do { \
- CHECK_VALUE(offsetof(s, m)); \
- CHECK_VALUE((int)offsetof(s, m)); \
--} while (0)
--
--#define CHECK_MEMBER_SIZEOF(s,m) \
--do { \
-- CHECK_VALUE((int)sizeof(((s *)0)->m)); \
--} while (0)
--
--#define CHECK_MEMBER(s,m) \
--do { \
-- CHECK_MEMBER_OFFSET(s, m); \
-- CHECK_MEMBER_SIZEOF(s, m); \
--} while (0)
--
--#define CHECK_STRUCT(s) \
--do { \
-- BLANK_LINE (); \
-- COMMENT ("Checks for struct "#s); \
-- CHECK_VALUE((int)sizeof(s)); \
--} while (0)
--
--void
--check_ptl_handle_wire (void)
--{
-- CHECK_STRUCT (ptl_handle_wire_t);
-- CHECK_MEMBER (ptl_handle_wire_t, wh_interface_cookie);
-- CHECK_MEMBER (ptl_handle_wire_t, wh_object_cookie);
--}
--
--void
--check_ptl_magicversion (void)
--{
-- CHECK_STRUCT (ptl_magicversion_t);
-- CHECK_MEMBER (ptl_magicversion_t, magic);
-- CHECK_MEMBER (ptl_magicversion_t, version_major);
-- CHECK_MEMBER (ptl_magicversion_t, version_minor);
--}
--
--void
--check_ptl_hdr (void)
--{
-- CHECK_STRUCT (ptl_hdr_t);
-- CHECK_MEMBER (ptl_hdr_t, dest_nid);
-- CHECK_MEMBER (ptl_hdr_t, src_nid);
-- CHECK_MEMBER (ptl_hdr_t, dest_pid);
-- CHECK_MEMBER (ptl_hdr_t, src_pid);
-- CHECK_MEMBER (ptl_hdr_t, type);
-- CHECK_MEMBER (ptl_hdr_t, payload_length);
-- CHECK_MEMBER (ptl_hdr_t, msg);
--
-- BLANK_LINE ();
-- COMMENT ("Ack");
-- CHECK_MEMBER (ptl_hdr_t, msg.ack.dst_wmd);
-- CHECK_MEMBER (ptl_hdr_t, msg.ack.match_bits);
-- CHECK_MEMBER (ptl_hdr_t, msg.ack.mlength);
--
-- BLANK_LINE ();
-- COMMENT ("Put");
-- CHECK_MEMBER (ptl_hdr_t, msg.put.ack_wmd);
-- CHECK_MEMBER (ptl_hdr_t, msg.put.match_bits);
-- CHECK_MEMBER (ptl_hdr_t, msg.put.hdr_data);
-- CHECK_MEMBER (ptl_hdr_t, msg.put.ptl_index);
-- CHECK_MEMBER (ptl_hdr_t, msg.put.offset);
--
-- BLANK_LINE ();
-- COMMENT ("Get");
-- CHECK_MEMBER (ptl_hdr_t, msg.get.return_wmd);
-- CHECK_MEMBER (ptl_hdr_t, msg.get.match_bits);
-- CHECK_MEMBER (ptl_hdr_t, msg.get.ptl_index);
-- CHECK_MEMBER (ptl_hdr_t, msg.get.src_offset);
-- CHECK_MEMBER (ptl_hdr_t, msg.get.sink_length);
--
-- BLANK_LINE ();
-- COMMENT ("Reply");
-- CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_wmd);
--
-- BLANK_LINE ();
-- COMMENT ("Hello");
-- CHECK_MEMBER (ptl_hdr_t, msg.hello.incarnation);
-- CHECK_MEMBER (ptl_hdr_t, msg.hello.type);
--}
--
--void
--system_string (char *cmdline, char *str, int len)
--{
-- int fds[2];
-- int rc;
-- pid_t pid;
--
-- rc = pipe (fds);
-- if (rc != 0)
-- abort ();
--
-- pid = fork ();
-- if (pid == 0) {
-- /* child */
-- int fd = fileno(stdout);
--
-- rc = dup2(fds[1], fd);
-- if (rc != fd)
-- abort();
--
-- exit(system(cmdline));
-- /* notreached */
-- } else if ((int)pid < 0) {
-- abort();
-- } else {
-- FILE *f = fdopen (fds[0], "r");
--
-- if (f == NULL)
-- abort();
--
-- close(fds[1]);
--
-- if (fgets(str, len, f) == NULL)
-- abort();
--
-- if (waitpid(pid, &rc, 0) != pid)
-- abort();
--
-- if (!WIFEXITED(rc) ||
-- WEXITSTATUS(rc) != 0)
-- abort();
--
-- if (strnlen(str, len) == len)
-- str[len - 1] = 0;
--
-- if (str[strlen(str) - 1] == '\n')
-- str[strlen(str) - 1] = 0;
--
-- fclose(f);
-- }
--}
--
--int
--main (int argc, char **argv)
--{
-- char unameinfo[80];
-- char gccinfo[80];
--
-- system_string("uname -a", unameinfo, sizeof(unameinfo));
-- system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo));
--
-- printf ("void lib_assert_wire_constants (void)\n"
-- "{\n"
-- " /* Wire protocol assertions generated by 'wirecheck'\n"
-- " * running on %s\n"
-- " * with %s */\n"
-- "\n", unameinfo, gccinfo);
--
-- BLANK_LINE ();
--
-- COMMENT ("Constants...");
-- CHECK_DEFINE (PORTALS_PROTO_MAGIC);
-- CHECK_DEFINE (PORTALS_PROTO_VERSION_MAJOR);
-- CHECK_DEFINE (PORTALS_PROTO_VERSION_MINOR);
--
-- CHECK_VALUE (PTL_MSG_ACK);
-- CHECK_VALUE (PTL_MSG_PUT);
-- CHECK_VALUE (PTL_MSG_GET);
-- CHECK_VALUE (PTL_MSG_REPLY);
-- CHECK_VALUE (PTL_MSG_HELLO);
--
-- check_ptl_handle_wire ();
-- check_ptl_magicversion ();
-- check_ptl_hdr ();
--
-- printf ("}\n\n");
--
-- return (0);
--}